diff --git data/files/alltypes.txt data/files/alltypes.txt new file mode 100644 index 0000000..594b299 --- /dev/null +++ data/files/alltypes.txt @@ -0,0 +1,2 @@ +true|10|100|1000|10000|4.0|20.0|2.2222|1969-12-31 15:59:58.174|1970-01-01 00:00:00|hello|k1:v1,k2:v2|100,200|{10, "foo"} +false|20|200|2000|20000|8.0|40.0|4.2222|1970-12-31 15:59:58.174|1971-01-01 00:00:00||k3:v3,k4:v4|200,300|{20, "bar"} diff --git data/files/dept.txt data/files/dept.txt new file mode 100644 index 0000000..292bee6 --- /dev/null +++ data/files/dept.txt @@ -0,0 +1,4 @@ +31|sales +33|engineering +34|clerical +35|marketing diff --git data/files/emp.txt data/files/emp.txt new file mode 100644 index 0000000..a0e76b9 --- /dev/null +++ data/files/emp.txt @@ -0,0 +1,6 @@ +Rafferty|31 +Jones|33 +Steinberg|33 +Robinson|34 +Smith|34 +John| diff --git data/files/loc.txt data/files/loc.txt new file mode 100644 index 0000000..69910b7 --- /dev/null +++ data/files/loc.txt @@ -0,0 +1,8 @@ +OH|31|43201|2001 +IO|32|43202|2001 +CA|35|43809|2001 +FL|33|54342|2001 +UT|35||2001 +CA|35|43809|2001 +|34|40000| +FL|33|54342|2001 diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index d22009a..4761bdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -406,6 +406,8 @@ DROP_COMMAND_NOT_ALLOWED_FOR_PARTITION(30011, "Partition protected from being dropped"), COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not supported " + "for partition columns"), + + STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"), ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index d320b47..ee3ce33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -32,9 +33,18 @@ import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DBStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -42,6 +52,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + /** * Join operator implementation. */ @@ -825,4 +838,210 @@ public boolean opAllowedBeforeMapJoin() { public boolean opAllowedAfterMapJoin() { return false; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // There are three cases + // 1: The values of join keys are disjoint in both relations in which case + // T(RXS) = 0 (we need histograms for this) + // 2: Join key is primary key on relation R and foreign key on relation S + // in which case every tuple in S will have a tuple in R + // T(RXS) = T(S) (we need histograms for this) + // 3: Both R & S relation have same value for join-key. Ex: bool column with + // all true values + // T(RXS) = T(R)*T(S) (we need histograms for this. counDistinct = 1 and same value) + + // Since we don't know how value of join column relates we will use the following + // general case + // T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute + + // in case of joining on multiple attribute + // T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)) + Statistics stats = conf.getStatistics(); + if (stats == null) { + stats = super.getStatistics(hiveconf); + + // populate the original column alias to internal name mapping + StatsUtils.populateAliasToIntColNameMappings(stats, this); + + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + + // without column stats do not proceed + if (ts != null && !ts.getColStatState().equals(TabStatistics.State.NONE)) { + + Map outColExprMap = ts.getOutColExprMap(); + Map tableAliasMap = StatsUtils.getTableAliasMap(this, dbName, tabName, stats); + + // The column expression map at this point will have mapping between + // column name and internal mapping. Internal mapping for each table + // columns will start from _col0. We need to update the internal + // column mapping to monotonically increasing column numbers which + // is similar to ouput column names of join operator + + // Example: + // Input = {tabName1 => {_col0 => column1, _col1 => column2}, tabName2 => {_col0 => + // attr1, _col1 => attr2}} + // Output = {tabName1 => {_col0 => column1, _col1 => column2}, tabName2 => {_col2 => + // attr1, _col3 => attr2}} + updateOutputColumnExprMap(this, outColExprMap, tableAliasMap); + + + long prodRows = 1; + List distinctVals = Lists.newArrayList(); + boolean multiAttr = false; + + // get the join keys from parent ReduceSink operators + List> parents = this.getParentOperators(); + for (int pos = 0; pos < parents.size(); pos++) { + ReduceSinkOperator parent = null; + if(parents.get(pos) instanceof ReduceSinkOperator) { + parent = (ReduceSinkOperator) this.getParentOperators().get(pos); + } else { + // must be a DEMUX operator. RS will be grand parent + parent = (ReduceSinkOperator) this.getParentOperators().get(0).getParentOperators().get(pos); + } + ReduceSinkDesc rsconf = parent.getConf(); + List keys = rsconf.getKeyCols(); + List dvs = Lists.newArrayList(); + + if (keys.size() > 1) { + multiAttr = true; + } + + // get the table scan operator i.e root of each parent to get + // column statistics corresponding to each tables + List roots = StatsUtils.getRoots(parent); + + // for each of the join keys get the distinct counts and use the + // max of it for applying the rule. + for (ExprNodeDesc end : keys) { + String joinCol = ""; + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + joinCol = encd.getColumn(); + } else if (end instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) engfd.getChildExprs().get(0); + joinCol = encd.getColumn(); + } + + for (TableScanOperator root : roots) { + String rootDBName = root.getConf().getTable().getDbName(); + String rootTabName = root.getConf().getTable().getTableName(); + + + Statistics parentStats = parent.getStatistics(hiveconf); + long dv = StatsUtils.getDistinctCountOfColumnFromTable(parentStats, rootDBName, + rootTabName, joinCol); + dvs.add(dv); + prodRows = prodRows + * parent.getStatistics(hiveconf).getDBStatisticsForDB(rootDBName) + .getTableStatsForTable(rootTabName).getNumRows(); + } + + long maxDV = Collections.max(dvs); + // preserve the max DV for each join key. this is useful for + // multi-attribute join case + distinctVals.add(maxDV); + } + } + + // compute the denominator for single key join and multi-key join + // using the rule from description above + long denom = 1; + if (multiAttr) { + // multiply the max distinct values of multiple keys + for (Long dv : distinctVals) { + denom = denom * dv; + } + } else { + denom = Collections.max(distinctVals); + } + + // compute the new rowCount + long newRowCount = prodRows / denom; + + // update the stats + StatsUtils.updateStats(stats, dbName, tabName, newRowCount); + } + } + } + } + + return stats; + } + + private void updateOutputColumnExprMap( + CommonJoinOperator commonJoinOperator, + Map outColExprMap, Map tableAliasMap) { + + Map revExprMap = commonJoinOperator.getConf().getReversedExprs(); + List outColNames = commonJoinOperator.getConf().getOutputColumnNames(); + + // mapping between out column name to input column names. Both output and + // input column names are internal names. + Map outColToInColMap = Maps.newHashMap(); + + int prevTablePos = -1; + int idx = 0; + + // outColNames will be _col0, _col1, _col2, _col5, _col6 + // using reversed expression map get the table position corresponding to these + // columns and create a map between outColName and current internal colName + // like below + // _col0 => _col0 + // _col1 => _col1 + // _col2 => _col2 + // _col5 => _col0 + // _col6 => _col1 + for (String outColName : outColNames) { + + // get table position from reversed expression map + int currTablePos = revExprMap.get(outColName); + + if (prevTablePos == -1) { + prevTablePos = currTablePos; + } + + // check if the table position is same as previous. if same then we are + // seeing the same table in which case we don't have to change col index + // else reset it to 0 + if (prevTablePos != currTablePos) { + idx = 0; + prevTablePos = currTablePos; + } + + outColToInColMap.put(outColName, "_col" + idx); + idx++; + } + + // update the input column names from different tables to be in monotonic + // sequence similar to match the output names + + // Input: + // table1: _col0, _col1, _col2 + // table2: _col0, _col1 + + // Output: + // table1: _col0, _col1, _col2 + // table2: _col5, _col6 + for (Map.Entry entry : revExprMap.entrySet()) { + String col = entry.getKey(); + if (!outColExprMap.containsKey(col)) { + String mappedKey = outColToInColMap.get(col); + if (outColExprMap.containsKey(mappedKey)) { + outColExprMap.put(col, outColExprMap.get(mappedKey)); + outColExprMap.remove(mappedKey); + } + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java index 945186d..462a328 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java @@ -27,11 +27,16 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DBStatistics; import org.apache.hadoop.hive.ql.plan.DemuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -363,4 +368,30 @@ static public String getOperatorName() { public OperatorType getType() { return OperatorType.DEMUX; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // DEMUX operator duplicates the input row by the number of children. so multiply + // the basic statistics by number of children + int numChilds = this.getChildOperators().size(); + Statistics stats = conf.getStatistics(); + if (stats == null) { + stats = super.getStatistics(hiveconf); + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + ts.setNumRows(ts.getNumRows() * numChilds); + ts.setRawDataSize(ts.getRawDataSize() * numChilds); + } + } + } + } + return stats; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java index 516ba42..31d0700 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java @@ -19,13 +19,36 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DBStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.io.LongWritable; @@ -96,16 +119,16 @@ public void processOp(Object row, int tag) throws HiveException { if (conf.isSortedFilter() && ioContext.useSorted()) { if (!(conditionEvaluator instanceof ExprNodeGenericFuncEvaluator)) { LOG.error("Attempted to use the fact data is sorted when the conditionEvaluator is not " + - "of type ExprNodeGenericFuncEvaluator"); + "of type ExprNodeGenericFuncEvaluator"); ioContext.setUseSorted(false); return; } else { - ioContext.setComparison(((ExprNodeGenericFuncEvaluator)conditionEvaluator).compare(row)); + ioContext.setComparison(((ExprNodeGenericFuncEvaluator) conditionEvaluator).compare(row)); } if (ioContext.getGenericUDFClassName() == null) { ioContext.setGenericUDFClassName( - ((ExprNodeGenericFuncEvaluator)conditionEvaluator).genericUDF.getClass().getName()); + ((ExprNodeGenericFuncEvaluator) conditionEvaluator).genericUDF.getClass().getName()); } // If we are currently searching the data for a place to begin, do not return data yet @@ -123,8 +146,8 @@ public void processOp(Object row, int tag) throws HiveException { Object condition = conditionEvaluator.evaluate(row); // If we are currently performing a binary search on the input, don't forward the results - // Currently this value is set when a query is optimized using a compact index. The map reduce - // job responsible for scanning and filtering the index sets this value. It remains set + // Currently this value is set when a query is optimized using a compact index. The map reduce + // job responsible for scanning and filtering the index sets this value. It remains set // throughout the binary search executed by the HiveBinarySearchRecordResder until a starting // point for a linear scan has been identified, at which point this value is unset. if (ioContext.isBinarySearching()) { @@ -185,4 +208,241 @@ public boolean supportAutomaticSortMergeJoin() { public boolean supportUnionRemoveOptimization() { return true; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // Filter operator doesn't change the average row size but it does change + // the number of rows emitted. The reduction in the number of rows emitted + // is dependent on the filter expression. + + // Notations: + // T(S) - Number of tuples in relations S + // V(S,A) - Number of distinct values of attribute A in relation S + + // Rules: + // 1 - Column equals a constant + // T(S) = T(R) / V(R,A) + + // 2 - Inequality conditions + // T(S) = T(R) / 3 + + // 3 - Not equals comparison + // T(S) = T(R) + // (or) + // T(S) = T(R) (V(R,A) - 1) / V(R,A) + + // 4 - NOT condition + // T(S) = 1 - T(S'), where T(S') is the satisfying condition + + // 5 - Multiple AND conditions + // Cascadingly apply the rules 1 to 3 (order doesn't matter) + + // 6 - Multiple OR conditions + // Simple case is to evaluate conditions independently and sum the results + // T(S) = m1 + m2 + // (or) + // T(S) = T(R) * ( 1 - ( 1 - m1/T(R) ) * ( 1 - m2/T(R) )) + // where, m1 is the number of tuples that satisfy condition1 and + // m2 is the number of tuples that satisfy condition2 + + // For more information, refer 'Estimating The Cost Of Operations' chapter + // in "Database Systems: The Complete Book" by Garcia-Molina et. al. + Statistics stats = conf.getStatistics(); + if (stats == null) { + stats = super.getStatistics(hiveconf); + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + // proceed only if column statistics are available + if (ts.getColStatState().equals(TabStatistics.State.COMPLETE)) { + ExprNodeDesc root = conf.getPredicate(); + long newRowCount = evaluateExpression(stats, dbName, tabName, root); + StatsUtils.updateStats(stats, dbName, tabName, newRowCount); + } + } + } + } + } + return stats; + } + + private long evaluateExpression(Statistics stats, String dbName, + String tabName, ExprNodeDesc pred) throws HiveException { + long newNumRows = 0; + Statistics andStats = null; + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + GenericUDF udf = genFunc.getGenericUDF(); + + // for AND condition cascadingly update stats + if (udf instanceof GenericUDFOPAnd) { + try { + andStats = stats.clone(); + stats.setAndExprStats(andStats); + } catch (CloneNotSupportedException e) { + throw new HiveException(ErrorMsg.STATISTICS_CLONING_FAILED); + } + + // evaluate children + for (ExprNodeDesc child : genFunc.getChildExprs()) { + newNumRows = evaluateChildExpr(stats.getAndExprStats(), dbName, tabName, child); + StatsUtils.updateStats(stats.getAndExprStats(), dbName, tabName, newNumRows); + } + + } else { + // for OR condition independently compute and update stats + if (udf instanceof GenericUDFOPOr) { + for (ExprNodeDesc child : genFunc.getChildExprs()) { + newNumRows += evaluateChildExpr(stats, dbName, tabName, child); + if (stats.getAndExprStats() != null) { + stats.setAndExprStats(null); + } + } + } else if (udf instanceof GenericUDFOPNot) { + newNumRows = evaluateNotExpr(stats, dbName, tabName, pred); + } else if (udf instanceof GenericUDFOPNotNull) { + newNumRows = evaluateColEqualsNullExpr(stats, dbName, tabName, pred); + newNumRows = stats.getDBStatisticsForDB(dbName).getTableStatsForTable(tabName) + .getNumRows() + - newNumRows; + } else if (udf instanceof GenericUDFOPNull) { + newNumRows = evaluateColEqualsNullExpr(stats, dbName, tabName, pred); + } else { + // single predicate condition + newNumRows = evaluateChildExpr(stats, dbName, tabName, pred); + } + } + } + + return newNumRows; + } + + private long evaluateNotExpr(Statistics stats, String dbName, + String tabName, ExprNodeDesc pred) throws HiveException { + + TabStatistics ts = stats.getDBStatisticsForDB(dbName).getTableStatsForTable(tabName); + long numRows = ts.getNumRows(); + + // if the evaluate yields true then pass all rows else pass 0 rows + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildExprs()) { + if (leaf instanceof ExprNodeGenericFuncDesc) { + // GenericUDF + long newNumRows = 0; + for (ExprNodeDesc child : ((ExprNodeGenericFuncDesc) pred).getChildExprs()) { + newNumRows = evaluateChildExpr(stats, dbName, tabName, child); + } + return numRows - newNumRows; + } else if (leaf instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) leaf; + if (encd.getValue().equals(true)) { + return 0; + } else { + return numRows; + } + } else { + // NOT on columns not possible + } + } + } + return 0; + } + + private long evaluateColEqualsNullExpr(Statistics stats, String dbName, String tabName, + ExprNodeDesc pred) { + + TabStatistics ts = stats.getDBStatisticsForDB(dbName).getTableStatsForTable(tabName); + long numRows = ts.getNumRows(); + + // evaluate similar to "col = constant" expr + if (pred instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildExprs()) { + + if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + String colName = colDesc.getColumn(); + long dvs = StatsUtils.getDistinctCountOfColumnFromTable(stats, dbName, tabName, colName); + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + return 0; + } + + private long evaluateChildExpr(Statistics stats, String dbName, + String tabName, ExprNodeDesc child) throws HiveException { + + TabStatistics ts = stats.getDBStatisticsForDB(dbName).getTableStatsForTable(tabName); + long numRows = ts.getNumRows(); + + if (child instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) child; + GenericUDF udf = genFunc.getGenericUDF(); + + if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { + String colName = null; + boolean isConst = false; + + for (ExprNodeDesc leaf : genFunc.getChildExprs()) { + + if (leaf instanceof ExprNodeConstantDesc) { + // if the first argument is const then just set the flag and continue + if (colName == null) { + isConst = true; + continue; + } + long dvs = StatsUtils + .getDistinctCountOfColumnFromTable(stats, dbName, tabName, colName); + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + + } else if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + colName = colDesc.getColumn(); + + // if const is first argument then evaluate the result + if (isConst) { + long dvs = StatsUtils.getDistinctCountOfColumnFromTable(stats, dbName, tabName, + colName); + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + } else if (udf instanceof GenericUDFOPNotEqual) { + + return numRows; + } else if (udf instanceof GenericUDFOPEqualOrGreaterThan || + udf instanceof GenericUDFOPEqualOrLessThan || + udf instanceof GenericUDFOPGreaterThan || + udf instanceof GenericUDFOPLessThan) { + + return numRows / 3; + } else { + + return evaluateExpression(stats, dbName, tabName, genFunc); + } + } + return 0; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 7d05982..6c6db3d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -41,12 +41,16 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.DBStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; @@ -69,6 +73,9 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + /** * GroupBy operator implementation. */ @@ -1207,4 +1214,215 @@ public boolean acceptLimitPushdown() { return getConf().getMode() == GroupByDesc.Mode.MERGEPARTIAL || getConf().getMode() == GroupByDesc.Mode.COMPLETE; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // Group By operator changes the number of rows. The number of rows emitted + // by GBY operator will be atleast 1 or utmost T(R) i.e, number of rows in + // relation R. A better estimate can be found if we have column statistics + // on the columns that we are grouping on. + // Suppose if we are grouping by attributes A,B,C and if statistics for + // columns A,B,C are available then a better estimate can be found by taking + // the smaller of product of V(R,[A,B,C]) (product number of distinct of A,B,C) + // and T(R)/2. + // For more information, refer 'Estimating The Cost Of Operations' chapter + // in "Database Systems: The Complete Book" by Garcia-Molina et. al. + Statistics stats = this.getConf().getStatistics(); + long newNumRows = 0; + + if (stats == null) { + stats = super.getStatistics(hiveconf); + StatsUtils.populateAliasToIntColNameMappings(stats, this); + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + Map outColExprMap = ts.getOutColExprMap(); + long dvProd = 1; + // get the grouping attribute and compute the product of distinct cardinalities + for (String attr : getGroupByAttributes(stats, outColExprMap, conf.getKeys())) { + dvProd *= StatsUtils.getDistinctCountOfColumnFromTable(stats, dbName, tabName, attr); + } + + // reducer side GBY with GROUPING_SETS will reduce the number of rows + // because of aggregation. For ex: GBY(A,B) WITH CUBE, mapper will emit + // 4 rows for each input row. The reducer side GBY will aggregate the + // rows and thereby decreasing the number of rows. The number of rows + // can be estimated as follows + // T(R) = min(T(R)/2, T(R, GBY(A,B)) + T(R, GBY(A)) + T(R, GBY(B)) + 1)) + if (this.parentOperators.get(0) instanceof ReduceSinkOperator) { + + // check if the map-side GBY has grouping set defined + if (isParentGBYContainsGroupingSet(this)) { + dvProd = 1; + // grouping sets are set of integers. The position of set bit + // in integer corresponds to grouping level. + // Ex: For Grouping attributes: A, B, C + // If grouping set integer is 3, the groping levels will be + // (A, *, *) corresponding to set bit positions 011 + Set gs = getGroupingSet(this); + + if (gs != null) { + List dims = getGroupByAttributes(stats, outColExprMap, conf.getKeys()); + List dvs = Lists.newArrayList(); + + // creates list of distinct counts corresponding to grouping attrs + for (String dim : dims) { + dvs.add(StatsUtils.getDistinctCountOfColumnFromTable(stats, dbName, tabName, + dim)); + } + + for (Integer gsIdx : gs) { + // based on the grouping set integer, compute the product + // of distinct counts which corresponds the expected number + // of rows + dvProd += getRowCountsForGroupingSet(gsIdx, dvs); + + // if the estimated num rows already exceed half the number of rows + // then we can exit early to avoid expensive computation + if (dvProd > (ts.getNumRows() / 2)) { + break; + } + } + } + + newNumRows = applyGBYRule(ts.getNumRows(), dvProd); + StatsUtils.updateStats(stats, dbName, tabName, newNumRows); + return stats; + } else { + // parent doesn't contain grouping sets. since it is reducer side + // we don't have to apply the GBY rule again since we have done + // it in the map side + return stats; + } + } + + // apply GBY rule in map side + newNumRows = applyGBYRule(ts.getNumRows(), dvProd); + + // if grouping set is present then it means a CUBE/ROLLUP/GROUPING_SET + // operation. In any of these cases, each row will be duplicated by + // for size of the grouping sets. For example: GBY(A,B,C) WITH CUBE will + // have GROUPING_SET(0, 1, 2, 3, 4, 5, 6, 7) which implies that each row + // will be duplicated 8 times for different combinations of aggregations. + // The number of rows/data size will be increased by a factor of the + // number of elements in grouping set + if (this.getConf().isGroupingSetsPresent()) { + int multiplier = this.getConf().getListGroupingSets().size(); + newNumRows = multiplier * newNumRows; + } + StatsUtils.updateStats(stats, dbName, tabName, newNumRows); + } + } + } + + } + return stats; + } + + private long applyGBYRule(long numRows, long dvProd) { + long newNumRows = numRows; + // to avoid divide by 2 to become 0 + if (numRows > 1) { + if (dvProd != 0) { + newNumRows = Math.min(numRows / 2, dvProd); + } else { + newNumRows = numRows / 2; + } + } + return newNumRows; + } + + private long getRowCountsForGroupingSet(Integer gsIdx, List dvs) { + int val = gsIdx.intValue(); + int idx = 0; + int size = dvs.size(); + List cloneDVS = Lists.newArrayList(); + cloneDVS.addAll(dvs); + while (val != 0) { + if ((val & 1) == 1) { + // set in the reverse order + cloneDVS.set(size - idx - 1, 1L); + } + val = val >>> 1; + idx++; + } + + long result = 1; + for (Long l : cloneDVS) { + result *= l; + } + return result; + } + + private List getGroupByAttributes(Statistics stats, + Map outColExprMap, + ArrayList keys) { + List attrs = Lists.newArrayList(); + for (ExprNodeDesc end : keys) { + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + String colName = encd.getColumn(); + if ((colName.startsWith("_col") || colName.startsWith("KEY._col")) && outColExprMap != null) { + if (colName.startsWith("KEY._col")) { + // strip off KEY. from column name + colName = colName.split("\\.")[1]; + } + ExprNodeDesc desc = outColExprMap.get(colName); + if (desc instanceof ExprNodeColumnDesc) { + colName = ((ExprNodeColumnDesc) desc).getColumn(); + } + } + // if the colName still starts with _ then it might be constant projection + if (!colName.startsWith("_")) { + attrs.add(colName); + } + } + } + return attrs; + } + + private Set getGroupingSet(Operator currOp) { + Set result = Sets.newHashSet(); + getGroupingSetImpl(currOp, result); + return result; + } + + private void getGroupingSetImpl(Operator currOp, Set gs) { + if (currOp == null) { + return; + } + for (Operator op : currOp.getParentOperators()) { + if (op instanceof GroupByOperator) { + GroupByOperator gby = (GroupByOperator) op; + if (gby.getConf().isGroupingSetsPresent()) { + gs.addAll(gby.getConf().getListGroupingSets()); + } + } else { + getGroupingSetImpl(op, gs); + } + } + } + + private boolean isParentGBYContainsGroupingSet(Operator currOp) { + if (currOp == null) { + return false; + } + for (Operator op : currOp.getParentOperators()) { + if (op instanceof GroupByOperator) { + GroupByOperator gby = (GroupByOperator) op; + if (gby.getConf().isGroupingSetsPresent()) { + return true; + } + } else { + return isParentGBYContainsGroupingSet(op); + } + } + return false; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java index 276902a..66779eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java @@ -19,11 +19,17 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DBStatistics; import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.stats.StatsUtils; /** * Limit operator implementation Limits the number of rows to be passed on. @@ -76,4 +82,35 @@ public void closeOp(boolean abort) throws HiveException { } } + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + Statistics stats = this.getConf().getStatistics(); + if (stats == null) { + stats = super.getStatistics(hiveconf); + + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + // get the limit rows and set it as new number of rows + if (ts != null) { + int globalLimit = this.getConf().getLimit(); + if (globalLimit == -1) { + globalLimit = this.getConf().getLeastRows(); + } + + if(globalLimit <= ts.getNumRows()) { + StatsUtils.updateStats(stats, dbName, tabName, globalLimit); + } + } + } + } + + } + return stats; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java index d5989ef..bcca2e5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java @@ -26,10 +26,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -323,4 +325,10 @@ static public String getOperatorName() { public OperatorType getType() { return OperatorType.MUX; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // passthrough + return super.getStatistics(hiveconf); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 9fc7afa..c008c37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -32,6 +32,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -39,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -1569,6 +1572,39 @@ public boolean acceptLimitPushdown() { return false; } + /** + * Computes and retrieves the stats for this operator. Default implementation + * just passes the statistics from its parent. If there are multiple parents + * then the statistics are kept together. If any of the parents have partial + * stats then the current operator will also have partial stats. + * + * @return Statistics for this operator + */ + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + Statistics stats = this.getConf().getStatistics(); + + if (stats == null) { + stats = new Statistics(); + int numParents = 0; + if (this.getParentOperators() != null) { + numParents = this.getParentOperators().size(); + } + + try { + stats = this.getParentOperators().get(0).getStatistics(hiveconf).clone(); + } catch (CloneNotSupportedException e) { + throw new HiveException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + if (numParents > 1) { + for (int i = 1; i < numParents; i++) { + stats.merge(this.getParentOperators().get(i).getStatistics(hiveconf)); + } + } + this.getConf().setStatistics(stats); + } + return stats; + } + @Override public String toString() { return getName() + "[" + getIdentifier() + "]"; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java index a249d74..cc36d26 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java @@ -22,20 +22,28 @@ import java.util.ArrayDeque; import java.util.Deque; import java.util.List; +import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.DBStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.PTFDeserializer; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag; import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator; import org.apache.hadoop.hive.serde2.SerDe; @@ -289,4 +297,82 @@ public static void connectLeadLagFunctionsToPartition(PTFDesc ptfDesc, } } + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // PTF operator doesn't change the input rows. However it adds new column + // for each aggregation. Following rules are exactly similar to SELECT operator. + // PTF operator will have a SEL operator as its child. Following code is same + // as in SELECT operator. SELECT operator will act as a pass through if its + // parent is a PTF operator + + Statistics stats = conf.getStatistics(); + SelectOperator childSEL = (SelectOperator) this.getChildOperators().get(0); + List colList = childSEL.getConf().getColList(); + boolean isConstPresent = StatsUtils.checkForConstOrUDFProjection(colList); + + if (stats == null) { + stats = super.getStatistics(hiveconf); + + // save the mapping between column aliases to the internal aliases. + // This is required for computing the size of projected column with alias + // renaming. Ex: select col1 as col_a, col2 as col_b from table; + StatsUtils.populateAliasToIntColNameMappings(stats, this); + + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + Map outColExprMap = ts.getOutColExprMap(); + if (ts.getBasicStatState().equals(TabStatistics.State.COMPLETE)) { + List projColAliases = StatsUtils.getProjectedColumns(colList); + + boolean[] colSelections = new boolean[ts.getNeededCols().size()]; + + for (String col : projColAliases) { + String actualColName = col; + if (col.startsWith("_") && outColExprMap != null) { + ExprNodeDesc desc = outColExprMap.get(col); + if (desc instanceof ExprNodeColumnDesc) { + actualColName = ((ExprNodeColumnDesc) desc).getColumn(); + } + } + int colIdx = ts.getNeededCols().indexOf(actualColName); + + // this could be a constant projection + if (colIdx == -1 && col.startsWith("_")) { + continue; + } + + colSelections[colIdx] = true; + } + + List colStats = ts.getColumnStats(); + if (colStats == null && !isConstPresent) { + return stats; + } + long reducedRawDataSize = StatsUtils.getRawDataSizeForSelectedColumns(stats, dbName, + tabName, colStats, colSelections); + + // constant projection adds to the data size + if (isConstPresent) { + reducedRawDataSize += StatsUtils.getConstOrUDFProjectionSize(ts.getNumRows(), + colList); + } + ts.setRawDataSize(reducedRawDataSize); + } + + } + } + + } + } + + return stats; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 025bf9e..6e12ea3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -20,13 +20,20 @@ import java.io.Serializable; import java.util.List; +import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.DBStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** @@ -129,4 +136,116 @@ public boolean supportUnionRemoveOptimization() { public boolean acceptLimitPushdown() { return true; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // Projection doesn't change the number of rows emitted from the parent + // operator. It changes the size of each tuple emitted. In a typical case, + // where only subset of columns are selected the average row size will + // reduce as some of the columns will be pruned. In order to accurately + // compute the average row size, column level statistics is required. + // Column level statistics stores average size of values in column which + // can be used to more reliably estimate the reduction in size of each + // tuple. + + // For more information, refer 'Estimating The Cost Of Operations' chapter + // in "Database Systems: The Complete Book" by Garcia-Molina et. al. + + // If column level statistics are not available then we can use the standard + // fixed size for primitive data types. But we cannot handle variable length types + // in the absence of column statistics. So we will skip applying rules in + // the absence of column statistics + Statistics stats = conf.getStatistics(); + List colList = conf.getColList(); + + // if there are constant or UDF projection then it needs be handled in a + // special way based on the output schema + boolean isConstPresent = StatsUtils.checkForConstOrUDFProjection(colList); + + if (stats == null) { + stats = super.getStatistics(hiveconf); + + // PTFOperator will not affect the input number of rows. Statistics rules + // for PTFOperator are same as SELECT operator. So, if PTFOperator already + // applied the rules then don't do again here. + // selectStar is projecting all columns. So we can skip it. + if (conf.isSelectStar() || this.getParentOperators().get(0) instanceof PTFOperator) { + return stats; + } + + // save the mapping between column aliases to the internal aliases. + // This is required for computing the size of projected column with alias + // renaming. Ex: select col1 as col_a, col2 as col_b from table; + StatsUtils.populateAliasToIntColNameMappings(stats, this); + + List tsops = StatsUtils.getRoots(this); + + for (TableScanOperator tsop : tsops) { + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + // if basic statistics are not available skip + if (ts != null && ts.getBasicStatState().equals(TabStatistics.State.COMPLETE)) { + + Map outColExprMap = ts.getOutColExprMap(); + + if (!conf.isSelStarNoCompute()) { + List projColAliases = StatsUtils.getProjectedColumns(colList); + boolean[] colSelections = new boolean[projColAliases.size()]; + + for (String col : projColAliases) { + String actualColName = col; + + // internal column names start with '_'. get the actual column name + if (col.startsWith("_") && outColExprMap != null) { + ExprNodeDesc desc = outColExprMap.get(col); + if (desc instanceof ExprNodeColumnDesc) { + actualColName = ((ExprNodeColumnDesc) desc).getColumn(); + } + } + + // projected columns is different from needed columns. projected + // columns will be a subset of needed columns. projected columns + // are the columns in SEL operator but some needed columns might + // be used by downstream operators. + // NOTE: projected columns will also include the virtual columns. + int colIdx = ts.getNeededCols().indexOf(actualColName); + + // this could be a constant projection or virtual column. it + // is handled below + if (colIdx == -1 && col.startsWith("_")) { + continue; + } + + // boolean vector with true values are actual columns whereas + // the remaining columns are virtual columns + colSelections[colIdx] = true; + } + + List colStats = ts.getColumnStats(); + if (colStats == null && !isConstPresent) { + return stats; + } + + long reducedRawDataSize = StatsUtils.getRawDataSizeForSelectedColumns(stats, dbName, + tabName, colStats, colSelections); + + // constant projection adds to the data size + if (isConstPresent) { + reducedRawDataSize += StatsUtils.getConstOrUDFProjectionSize(ts.getNumRows(), + conf.getColList()); + } + ts.setRawDataSize(reducedRawDataSize); + } + + } + } + + } + } + + return stats; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index e538092..21d30ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -27,15 +27,18 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.apache.hadoop.hive.ql.stats.StatsSetupConst; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -320,6 +323,17 @@ private void publishStats() throws HiveException { } @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + Statistics stats = this.getConf().getStatistics(); + if (stats == null) { + stats = StatsUtils.collectStatistics(alias, getConf().getTable(), hiveconf, this, getConf() + .getPruningPredicate()); + this.getConf().setStatistics(stats); + } + return stats; + } + + @Override public boolean supportSkewJoinOptimization() { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java index 59c07c3..246f9ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java @@ -23,7 +23,9 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.UnionDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; @@ -176,4 +178,10 @@ public boolean opAllowedBeforeSortMergeJoin() { // it would be difficult to figure out the big table for the mapjoin. return false; } + + @Override + public Statistics getStatistics(HiveConf hiveconf) throws HiveException { + // passthrough + return super.getStatistics(hiveconf); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateStatsProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateStatsProc.java new file mode 100644 index 0000000..053988d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateStatsProc.java @@ -0,0 +1,27 @@ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.Stack; + +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class AnnotateStatsProc implements NodeProcessor { + + @Override + public Object + process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + AnnotateStatsProcCtx context = (AnnotateStatsProcCtx) procCtx; + FileSinkOperator sink = (FileSinkOperator) nd; + try { + sink.getStatistics(context.getConf()); + } catch (HiveException e) { + throw new SemanticException(e); + } + return false; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateStatsProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateStatsProcCtx.java new file mode 100644 index 0000000..f6751f1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateStatsProcCtx.java @@ -0,0 +1,58 @@ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.Map; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; + +public class AnnotateStatsProcCtx implements NodeProcessorCtx { + + private ParseContext pctx; + private HiveConf conf; + private Map> outColExprMap = null; + private Statistics andExprStats = null; + + public AnnotateStatsProcCtx(ParseContext pctx) { + this.setParseContext(pctx); + if(pctx != null) { + this.setConf(pctx.getConf()); + } else { + this.setConf(null); + } + } + + public HiveConf getConf() { + return conf; + } + + public void setConf(HiveConf conf) { + this.conf = conf; + } + + public ParseContext getParseContext() { + return pctx; + } + + public void setParseContext(ParseContext pctx) { + this.pctx = pctx; + } + + public Map> getOutColExprMap() { + return outColExprMap; + } + + public void setOutColExprMap(Map> outColExprMap) { + this.outColExprMap = outColExprMap; + } + + public Statistics getAndExprStats() { + return andExprStats; + } + + public void setAndExprStats(Statistics andExprStats) { + this.andExprStats = andExprStats; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateWithStatistics.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateWithStatistics.java new file mode 100644 index 0000000..b2269f6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateWithStatistics.java @@ -0,0 +1,64 @@ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class AnnotateWithStatistics implements Transform { + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx); + + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher generates the plan from the operator + // tree + Map opRules = new LinkedHashMap(); + opRules.put( + new RuleRegExp(new String("Set statistics - FileSink"), FileSinkOperator.getOperatorName() + + "%"), getAnnotateStatsProc()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, aspCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } + + private NodeProcessor getAnnotateStatsProc() { + return new AnnotateStatsProc(); + } + + private NodeProcessor getDefaultProc() { + return new NodeProcessor() { + @Override + public Object process(Node nd, + Stack stack, + NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + return null; + } + }; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 3a76bfc..b7a7685 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -111,6 +111,10 @@ public void initialize(HiveConf hiveConf) { if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) { transformations.add(new LimitPushdownOptimizer()); } + // for now we will do annotation only in explain + if (pctx.getContext().getExplain()) { + transformations.add(new AnnotateWithStatistics()); + } transformations.add(new SimpleFetchOptimizer()); // must be called last if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java index 51464e5..0004a0e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java @@ -125,6 +125,9 @@ protected void addPruningPred(Map opToPrunner, // Put the mapping from table scan operator to pruner_pred opToPrunner.put(top, pruner_pred); + // Set the predicate in the table directly + top.getConf().setPruningPredicate(pruner_pred); + return; } @@ -165,6 +168,9 @@ protected void addPruningPred(Map> // Put the mapping from table scan operator to part-pruner map opToPrunner.put(top, partToPruner); + // Set the predicate in the table directly + top.getConf().setPruningPredicate(pruner_pred); + return; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 5412373..99f9345 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -157,7 +157,7 @@ public static PrunedPartitionList prune(TableScanOperator ts, ParseContext parse * pruner condition. * @throws HiveException */ - private static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, + public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map prunedPartitionsMap) throws HiveException { LOG.trace("Started pruning partiton"); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c34b261..6aa543d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7810,6 +7810,10 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { // Add a mapping from the table scan operator to Table topToTable.put((TableScanOperator) top, tab); + + // set the table in the tablescan descriptor directly + ((TableScanOperator) top).getConf().setTable(tab); + Map props = qb.getTabPropsForAlias(alias); if (props != null) { topToTableProps.put((TableScanOperator) top, props); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index c096a65..24694ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -21,6 +21,18 @@ public class AbstractOperatorDesc implements OperatorDesc { private boolean vectorMode = false; + protected transient Statistics statistics; + + @Override + @Explain(displayName = "Statistics", normalExplain = false) + public Statistics getStatistics() { + return statistics; + } + + @Override + public void setStatistics(Statistics statistics) { + this.statistics = statistics; + } @Override public Object clone() throws CloneNotSupportedException { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java new file mode 100644 index 0000000..5f34393 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + + +public class ColStatistics { + + private String colName; + private String colType; + private long countDistint; + private long numNulls; + private int avgColLen; + + public ColStatistics(String colName, String colType) { + this.setColumnName(colName); + this.setColumnType(colType); + } + + public ColStatistics() { + this(null, null); + } + + public String getColumnName() { + return colName; + } + + public void setColumnName(String colName) { + this.colName = colName; + } + + public String getColumnType() { + return colType; + } + + public void setColumnType(String colType) { + this.colType = colType; + } + + public long getCountDistint() { + return countDistint; + } + + public void setCountDistint(long countDistint) { + this.countDistint = countDistint; + } + + public long getNumNulls() { + return numNulls; + } + + public void setNumNulls(long numNulls) { + this.numNulls = numNulls; + } + + public int getAvgColLen() { + return avgColLen; + } + + public void setAvgColLen(int avgColLen) { + this.avgColLen = avgColLen; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" colName: "); + sb.append(colName); + sb.append(" colType: "); + sb.append(colType); + sb.append(" countDistincts: "); + sb.append(countDistint); + sb.append(" numNulls: "); + sb.append(numNulls); + sb.append(" avgColLen: "); + sb.append(avgColLen); + return sb.toString(); + } + + @Override + protected ColStatistics clone() throws CloneNotSupportedException { + ColStatistics clone = new ColStatistics(colName, colType); + clone.setAvgColLen(avgColLen); + clone.setCountDistint(countDistint); + clone.setNumNulls(numNulls); + return clone; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof ColStatistics) { + ColStatistics cs = (ColStatistics) obj; + if (!colName.equalsIgnoreCase(cs.colName) || + !colType.equalsIgnoreCase(cs.colType) || + countDistint != cs.countDistint || + numNulls != cs.numNulls || + avgColLen != cs.avgColLen) { + return false; + } + } else { + return false; + } + return true; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/DBStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/DBStatistics.java new file mode 100644 index 0000000..d6f594a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DBStatistics.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.List; + +import org.apache.hadoop.hive.ql.plan.TabStatistics.Level; + +import com.google.common.collect.Lists; + +public class DBStatistics implements Mergeable { + + private String dbname; + private List tabStats; + + public DBStatistics() { + this("default"); + } + + public DBStatistics(String dbname) { + this.setDBname(dbname); + this.setTableStats(null); + } + + public String getDBname() { + return dbname; + } + + public void setDBname(String dbname) { + this.dbname = dbname; + } + + public List getTableStats() { + return tabStats; + } + + public void setTableStats(List tabStats) { + this.tabStats = tabStats; + } + + public void addToTableStats(TabStatistics ts) { + if (tabStats == null) { + tabStats = Lists.newArrayList(); + } + tabStats.add(ts); + } + + public TabStatistics getTableStatsForTable(String tableName) { + for (TabStatistics ts : tabStats) { + if (ts.getTableName().equalsIgnoreCase(tableName)) { + return ts; + } + } + + return null; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" dbName: "); + sb.append(dbname); + sb.append(" tabStats: "); + sb.append(tabStats.toString()); + return sb.toString(); + } + + public String explainString() { + StringBuilder sb = new StringBuilder(); + sb.append(" dbName: "); + sb.append(dbname); + sb.append(" tabStats: ["); + for (TabStatistics ts : tabStats) { + sb.append(ts.explainString()); + if (tabStats.size() > 1 && (tabStats.indexOf(ts) != tabStats.size() - 1)) { + sb.append(", "); + } + } + sb.append("]"); + return sb.toString(); + } + + @Override + protected DBStatistics clone() throws CloneNotSupportedException { + DBStatistics clone = new DBStatistics(); + clone.setDBname(dbname); + for (TabStatistics ts : tabStats) { + clone.addToTableStats(ts.clone()); + } + return clone; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof DBStatistics) { + DBStatistics ds = (DBStatistics) obj; + if (!dbname.equalsIgnoreCase(ds.dbname) || + !tabStats.equals(ds.tabStats)) { + return false; + } + } else { + return false; + } + return true; + } + + @Override + public void merge(Object obj) { + if (obj instanceof DBStatistics) { + DBStatistics dbs = (DBStatistics) obj; + for (TabStatistics ts : dbs.getTableStats()) { + if (get(ts) != null) { + TabStatistics tbmerge = (TabStatistics) get(ts); + + if (tbmerge.getLevel().equals(Level.TABLE)) { + // table level merge + // merge table levels or partition levels. cannot mix both + if (tbmerge.getLevel().equals(ts.getLevel())) { + tbmerge.merge(ts); + } + } else { + // partition level merge + + for (PartStatistics ps : ts.getPartitionStats()) { + if (get(ps) != null) { + PartStatistics psmerge = (PartStatistics) get(ps); + psmerge.merge(ps); + } + } + } + } else { + tabStats.add(ts); + } + } + } + } + + @Override + public Object get(Object obj) { + // checks for the existence of TabStatistics + if (obj instanceof TabStatistics) { + TabStatistics tbs = (TabStatistics) obj; + for (TabStatistics tbstat : tabStats) { + // the reason why we are not using equals() comparison here is, equals() + // make a deep comparison. we are only interested in comparing the name + // for merging the db statistics + if (tbstat.getTableName().equalsIgnoreCase(tbs.getTableName())) { + return tbstat; + } + } + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Mergeable.java ql/src/java/org/apache/hadoop/hive/ql/plan/Mergeable.java new file mode 100644 index 0000000..b468f16 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Mergeable.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +public interface Mergeable { + /** + * Multiple objects of a class implementing this interface can be merged into + * a single object of the same type + * + * @param obj + */ + public void merge(Object obj); + + /** + * to check if the mergeable object exists and if get the object to merge + * @param obj + * @return + */ + public Object get(Object obj); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java index 36757e8..6c2efaf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java @@ -22,4 +22,6 @@ public interface OperatorDesc extends Serializable, Cloneable { public Object clone() throws CloneNotSupportedException; + public Statistics getStatistics(); + public void setStatistics(Statistics statistics); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartStatistics.java new file mode 100644 index 0000000..105c013 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartStatistics.java @@ -0,0 +1,268 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.List; + +import org.apache.hadoop.hive.ql.plan.TabStatistics.State; + +import com.google.common.collect.Lists; + +public class PartStatistics implements Mergeable { + + private String partName; + private List colStats; + private long numRows; + private long rawDataSize; + private State basicStatState; + private State colStatState; + + public PartStatistics(String partName) { + this.setPartitionName(partName); + this.setColumnStats(null); + this.setBasicStatState(State.NONE); + this.setColStatState(State.NONE); + } + + public String getPartitionName() { + return partName; + } + + public void setPartitionName(String partName) { + this.partName = partName; + } + + public List getColumnStats() { + return colStats; + } + + public void setColumnStats(List colStats) { + this.colStats = colStats; + } + + public void addToColumnStats(ColStatistics colStat) { + if (colStats == null) { + colStats = Lists.newArrayList(); + } + colStats.add(colStat); + } + + public ColStatistics getColumnStatisticsForColumn(String colName) { + for (ColStatistics cs : colStats) { + if (cs.getColumnName().equalsIgnoreCase(colName)) { + return cs; + } + } + + return null; + } + + public long getNumRows() { + return numRows; + } + + public void setNumRows(long numRows) { + this.numRows = numRows; + } + + public long getRawDataSize() { + return rawDataSize; + } + + public void setRawDataSize(long rawDataSize) { + this.rawDataSize = rawDataSize; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" partName: "); + sb.append(partName); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" rawDataSize: "); + sb.append(rawDataSize); + sb.append(" partStatState: "); + sb.append(basicStatState); + sb.append(" colStatState: "); + sb.append(colStatState); + sb.append(" colStats: "); + sb.append(colStats.toString()); + return sb.toString(); + } + + @Explain(displayName = "Partition Statistics") + public String explainString() { + StringBuilder sb = new StringBuilder(); + sb.append(" partName: "); + sb.append(partName); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" rawDataSize: "); + sb.append(rawDataSize); + sb.append(" partStatState: "); + sb.append(basicStatState); + sb.append(" colStatState: "); + sb.append(colStatState); + return sb.toString(); + } + + @Override + protected PartStatistics clone() throws CloneNotSupportedException { + PartStatistics clone = new PartStatistics(partName); + for (ColStatistics cs : colStats) { + clone.addToColumnStats(cs.clone()); + } + clone.setNumRows(numRows); + clone.setRawDataSize(rawDataSize); + clone.setBasicStatState(basicStatState); + clone.setColStatState(colStatState); + return clone; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof PartStatistics) { + PartStatistics ps = (PartStatistics) obj; + if (!partName.equalsIgnoreCase(ps.partName) || + !colStats.equals(ps.colStats) || + numRows != ps.numRows || + rawDataSize != ps.rawDataSize || + !basicStatState.equals(ps.basicStatState) || + !colStatState.equals(ps.colStatState)) { + return false; + } + } else { + return false; + } + return true; + } + + public State getBasicStatState() { + return basicStatState; + } + + public void setBasicStatState(State partStatState) { + this.basicStatState = partStatState; + } + + public State getColStatState() { + return colStatState; + } + + public void setColStatState(State colStatState) { + this.colStatState = colStatState; + } + + @Override + public void merge(Object obj) { + if (obj instanceof PartStatistics) { + PartStatistics ps = (PartStatistics) obj; + numRows += ps.numRows; + rawDataSize += ps.rawDataSize; + updateBasicStatsState(ps.basicStatState); + updateColStatsState(ps.colStatState); + + for (ColStatistics cs : ps.getColumnStats()) { + if (get(cs) != null) { + // FIXME: how to merge columns stats? esp. countDistincts? + } else { + colStats.add(cs); + } + } + } + } + + @Override + public Object get(Object obj) { + // checks for the existence of ColStatistics + if (obj instanceof ColStatistics) { + ColStatistics cs = (ColStatistics) obj; + for (ColStatistics cstat : colStats) { + // the reason why we are not using equals() comparison here is, equals() + // make a deep comparison. we are only interested in comparing the name + // for merging the db statistics + if (cstat.getColumnName().equalsIgnoreCase(cs.getColumnName())) { + return cstat; + } + } + } + return null; + } + + + // basic stats state while adding new partition statistics to table statistics + // + // partition + // ---------------------------------------- + // partition| COMPLETE PARTIAL NONE | + // |______________________________________| + // COMPLETE | COMPLETE PARTIAL PARTIAL | + // PARTIAL | PARTIAL PARTIAL PARTIAL | + // NONE | COMPLETE PARTIAL NONE | + // ---------------------------------------- + + // For ex: if table stats state is COMPLETE and if partition stats state is + // PARTIAL then update table stats state to PARTIAL + private void updateBasicStatsState(State partStatState) { + if (partStatState.equals(State.PARTIAL)) { + basicStatState = State.PARTIAL; + } + + if (partStatState.equals(State.NONE)) { + if (basicStatState.equals(State.NONE)) { + basicStatState = State.NONE; + } else { + basicStatState = State.PARTIAL; + } + } + + if (partStatState.equals(State.COMPLETE)) { + if (basicStatState.equals(State.PARTIAL)) { + basicStatState = State.PARTIAL; + } else { + basicStatState = State.COMPLETE; + } + } + } + + // similar to the table above for basic stats + private void updateColStatsState(State partColState) { + if (partColState.equals(State.PARTIAL)) { + colStatState = State.PARTIAL; + } + + if (partColState.equals(State.NONE)) { + if (colStatState.equals(State.NONE)) { + colStatState = State.NONE; + } else { + colStatState = State.PARTIAL; + } + } + + if (partColState.equals(State.COMPLETE)) { + if (colStatState.equals(State.PARTIAL)) { + colStatState = State.PARTIAL; + } else { + colStatState = State.COMPLETE; + } + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java new file mode 100644 index 0000000..461ce42 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.List; + +import com.google.common.collect.Lists; + +/** + * Statistics. Describes the output of an operator in terms of size, rows, etc + * based on estimates. + */ +@SuppressWarnings("serial") +public class Statistics implements Serializable, Mergeable { + + private List dbStats; + + // AND expression statistics is a clone of original statistics object which is + // used by filter operator. When applying statistics rules for AND expression + // the updated stats object is used cascadingly. So we need a clone of stats + // object whose stats are updated cascadingly by AND operator. All other + // expressions (OR, NOT etc.) apply the rules independent of each other and + // so we can reuse the original stats object. + private Statistics andExprStats; + + public Statistics() { + this.setDBStats(null); + } + + public void addToDBStatistics(DBStatistics dbstat) { + if (dbStats == null) { + dbStats = Lists.newArrayList(); + } + dbStats.add(dbstat); + } + + public DBStatistics getDBStatisticsForDB(String dbname) { + for (DBStatistics ds : dbStats) { + if (ds.getDBname().equalsIgnoreCase(dbname)) { + return ds; + } + } + return null; + } + + public List getDBStats() { + return dbStats; + } + + public void setDBStats(List dbStats) { + this.dbStats = dbStats; + } + + public Statistics getAndExprStats() { + return andExprStats; + } + + public void setAndExprStats(Statistics andExprStats) { + this.andExprStats = andExprStats; + } + + @Override + @Explain(displayName = "") + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("dbStats: ["); + for (DBStatistics dbs : dbStats) { + sb.append(dbs.explainString()); + if (dbStats.size() > 1 && (dbStats.indexOf(dbs) != dbStats.size() - 1)) { + sb.append(", "); + } + } + sb.append("]"); + return sb.toString(); + } + + @Override + public Statistics clone() throws CloneNotSupportedException { + Statistics clone = new Statistics(); + if (dbStats == null) { + return null; + } + for (DBStatistics ds : dbStats) { + clone.addToDBStatistics(ds.clone()); + } + return clone; + } + + @Override + public void merge(Object obj) { + if (obj instanceof Statistics) { + Statistics st = (Statistics) obj; + // only merge DBStatistics. we will not merge andExprStats as it temporary. + for (DBStatistics dbs : st.getDBStats()) { + if (get(dbs) != null) { + DBStatistics dbmerge = (DBStatistics) get(dbs); + // there is nothing to merge in DBStatistics as the dbnames are already + // the same + dbmerge.merge(dbs); + } else { + dbStats.add(dbs); + } + } + } + } + + @Override + public Object get(Object obj) { + if (obj instanceof DBStatistics) { + DBStatistics dbs = (DBStatistics) obj; + for (DBStatistics dbstat : dbStats) { + // the reason why we are not using equals() comparison here is, equals() + // make a deep comparison. we are only interested in comparing the dbname + // for merging the db statistics + if (dbstat.getDBname().equalsIgnoreCase(dbs.getDBname())) { + return dbstat; + } + } + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TabStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/TabStatistics.java new file mode 100644 index 0000000..aaca321 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TabStatistics.java @@ -0,0 +1,418 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class TabStatistics implements Mergeable { + + // partial stats is not very reliable flag. Even if this flag is set to 'false', + // it may not be reliable. The reason for this is that hive has various ways + // to perform DDL operations. If a table is externally managed or if a partition + // is added by disabling hive.stats.autogather then the stats reported by + // metastore will not be reliable. Reliability of the stats should be taken care + // by hive DDL by setting/maintaining flags in metastore. + public enum State { + COMPLETE, PARTIAL, NONE + } + + public enum Level { + TABLE, PARTITION + } + + private String tabName; + + // basic stats + private long numRows; + private long rawDataSize; + private State basicStatState; + + private Level level; + private List partStats; + + // column stats + private List colStats; + private List neededCols; + private State colStatState; + + // output column expression map is required by some operators to find mapping + // between column aliases vs internal column names + private Map outColExprMap; + + // maintains the mapping between the original table name and its alias + private Map tableAliasMap; + + public TabStatistics(String tableName) { + this.setTableName(tableName); + this.setColumnStats(null); + this.setLevel(Level.TABLE); + this.setPartitionStats(null); + this.setNeededCols(null); + this.setOutColExprMap(null); + this.setTableAliasMap(null); + this.setBasicStatState(State.NONE); + this.setColStatState(State.NONE); + } + + public String getTableName() { + return tabName; + } + + public void setTableName(String tabName) { + this.tabName = tabName; + } + + public List getColumnStats() { + return colStats; + } + + public void setColumnStats(List colStats) { + this.colStats = colStats; + } + + public void addToColumnStats(ColStatistics colStat) { + if (colStats == null) { + this.colStats = Lists.newArrayList(); + } + colStats.add(colStat); + } + + public Level getLevel() { + return level; + } + + public void setLevel(Level level) { + this.level = level; + } + + public List getPartitionStats() { + return partStats; + } + + public void setPartitionStats(List partStats) { + this.partStats = partStats; + } + + public void addToPartitionStats(PartStatistics partStat) { + if (partStats == null) { + partStats = Lists.newArrayList(); + } + // merge basic stats + addToNumRows(partStat.getNumRows()); + addToRawDataSize(partStat.getRawDataSize()); + updateBasicStatsState(partStat.getBasicStatState()); + + // TODO: do we need to merge column stats from partitions to table? + // For now update only the state + updateColStatsState(partStat.getColStatState()); + partStats.add(partStat); + } + + // basic stats state while adding new partition statistics to table statistics + // + // partition + // ---------------------------------------- + // table | COMPLETE PARTIAL NONE | + // |______________________________________| + // COMPLETE | COMPLETE PARTIAL PARTIAL | + // PARTIAL | PARTIAL PARTIAL PARTIAL | + // NONE | COMPLETE PARTIAL NONE | + // ---------------------------------------- + + // For ex: if table stats state is COMPLETE and if partition stats state is + // PARTIAL then update table stats state to PARTIAL + private void updateBasicStatsState(State partStatState) { + if (partStatState.equals(State.PARTIAL)) { + basicStatState = State.PARTIAL; + } + + if (partStatState.equals(State.NONE)) { + if (basicStatState.equals(State.NONE)) { + basicStatState = State.NONE; + } else { + basicStatState = State.PARTIAL; + } + } + + if (partStatState.equals(State.COMPLETE)) { + if (basicStatState.equals(State.PARTIAL)) { + basicStatState = State.PARTIAL; + } else { + basicStatState = State.COMPLETE; + } + } + } + + // similar to the table above for basic stats + private void updateColStatsState(State partColState) { + if (partColState.equals(State.PARTIAL)) { + colStatState = State.PARTIAL; + } + + if (partColState.equals(State.NONE)) { + if (colStatState.equals(State.NONE)) { + colStatState = State.NONE; + } else { + colStatState = State.PARTIAL; + } + } + + if (partColState.equals(State.COMPLETE)) { + if (colStatState.equals(State.PARTIAL)) { + colStatState = State.PARTIAL; + } else { + colStatState = State.COMPLETE; + } + } + } + + public ColStatistics getColumnStatisticsForColumn(String colName) { + for (ColStatistics cs : colStats) { + if (cs.getColumnName().equalsIgnoreCase(colName)) { + return cs; + } + } + + return null; + } + + public PartStatistics getPartitionStatisticsForPartition(String partName) { + for (PartStatistics ps : partStats) { + if (ps.getPartitionName().equalsIgnoreCase(partName)) { + return ps; + } + } + + return null; + } + + public List getNeededCols() { + return neededCols; + } + + public void setNeededCols(List neededCols) { + this.neededCols = neededCols; + } + + public long getNumRows() { + return numRows; + } + + public void setNumRows(long numRows) { + this.numRows = numRows; + } + + public void addToNumRows(long nr) { + this.numRows += nr; + } + + public long getRawDataSize() { + return rawDataSize; + } + + public void setRawDataSize(long rawDataSize) { + this.rawDataSize = rawDataSize; + } + + public void addToRawDataSize(long rds) { + this.rawDataSize += rds; + } + + @Override + protected TabStatistics clone() throws CloneNotSupportedException { + TabStatistics clone = new TabStatistics(tabName); + if (colStats != null) { + for (ColStatistics cs : colStats) { + if (cs == null) { + clone.addToColumnStats(null); + } else { + clone.addToColumnStats(cs.clone()); + } + } + } + clone.setBasicStatState(basicStatState); + clone.setLevel(level); + List needCols = Lists.newArrayList(); + for (String nc : neededCols) { + needCols.add(nc); + } + clone.setColStatState(colStatState); + clone.setNeededCols(needCols); + clone.setNumRows(numRows); + clone.setRawDataSize(rawDataSize); + return clone; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" tabName: "); + sb.append(tabName); + sb.append(" basicStatState: "); + sb.append(basicStatState); + sb.append(" level: "); + sb.append(level); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" rawDataSize: "); + sb.append(rawDataSize); + sb.append(" colStatState: "); + sb.append(colStatState); + sb.append(" colStats: "); + sb.append(colStats.toString()); + return sb.toString(); + } + + public String explainString() { + StringBuilder sb = new StringBuilder(); + sb.append(" tabName: "); + sb.append(tabName); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" rawDataSize: "); + sb.append(rawDataSize); + sb.append(" basicStatState: "); + sb.append(basicStatState); + sb.append(" level: "); + sb.append(level); + sb.append(" colStatState: "); + sb.append(colStatState); + return sb.toString(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof TabStatistics) { + TabStatistics ts = (TabStatistics) obj; + if (!tabName.equalsIgnoreCase(ts.tabName) || + !basicStatState.equals(ts.basicStatState) || + !level.equals(ts.level) || + !colStats.equals(ts.colStats) || + !neededCols.equals(ts.neededCols) || + numRows != ts.numRows || + rawDataSize != ts.rawDataSize || + !colStatState.equals(ts.colStatState)) { + return false; + } + } else { + return false; + } + return true; + } + + public Map getOutColExprMap() { + return outColExprMap; + } + + public void setOutColExprMap(Map outColExprMap) { + this.outColExprMap = outColExprMap; + } + + public Map getTableAliasMap() { + return tableAliasMap; + } + + public void setTableAliasMap(Map tableAliasMap) { + this.tableAliasMap = tableAliasMap; + } + + public State getBasicStatState() { + return basicStatState; + } + + public void setBasicStatState(State basicStatState) { + this.basicStatState = basicStatState; + } + + public long getAvgRowSize() { + if (numRows != 0) { + return rawDataSize / numRows; + } else { + return rawDataSize; + } + } + + public State getColStatState() { + return colStatState; + } + + public void setColStatState(State colStatState) { + this.colStatState = colStatState; + } + + @Override + public void merge(Object obj) { + if (obj instanceof TabStatistics) { + TabStatistics ts = (TabStatistics) obj; + numRows += ts.numRows; + rawDataSize += ts.rawDataSize; + updateBasicStatsState(ts.basicStatState); + updateColStatsState(ts.colStatState); + + for (String nc : ts.getNeededCols()) { + if (!neededCols.contains(nc)) { + neededCols.add(nc); + } + } + + for (ColStatistics cs : ts.getColumnStats()) { + if (get(cs) != null) { + // FIXME: how to merge columns stats? esp. countDistincts? + } else { + colStats.add(cs); + } + } + + if (ts.getOutColExprMap() != null) { + if (outColExprMap == null) { + outColExprMap = Maps.newHashMap(); + } + outColExprMap.putAll(ts.getOutColExprMap()); + } + + if (ts.getTableAliasMap() != null) { + if (tableAliasMap == null) { + tableAliasMap = Maps.newHashMap(); + } + tableAliasMap.putAll(ts.getTableAliasMap()); + } + } + } + + @Override + public Object get(Object obj) { + // checks for the existence of ColStatistics + if (obj instanceof ColStatistics) { + ColStatistics cs = (ColStatistics) obj; + for (ColStatistics cstat : colStats) { + // the reason why we are not using equals() comparison here is, equals() + // make a deep comparison. we are only interested in comparing the name + // for merging the db statistics + if (cstat.getColumnName().equalsIgnoreCase(cs.getColumnName())) { + return cstat; + } + } + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index ec2f8f2..04f5475 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; /** @@ -33,6 +34,9 @@ public class TableScanDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; + private transient Table table; + private transient ExprNodeDesc pruningPredicate; + private String alias; private List virtualCols; @@ -75,6 +79,22 @@ public TableScanDesc() { } + public Table getTable() { + return table; + } + + public void setTable(Table t) { + table = t; + } + + public ExprNodeDesc getPruningPredicate() { + return pruningPredicate; + } + + public void setPruningPredicate(ExprNodeDesc expr) { + pruningPredicate = expr; + } + public TableScanDesc(final String alias) { this.alias = alias; } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java new file mode 100644 index 0000000..fdf6205 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -0,0 +1,1176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.DBStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PartStatistics; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics; +import org.apache.hadoop.hive.ql.plan.TabStatistics.Level; +import org.apache.hadoop.hive.ql.plan.TabStatistics.State; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class StatsUtils { + + /** + * Collect table, partition and column level statistics + * + * @param alias + * @param table + * @param conf + * @param tableScanOperator + * @param expr + * @return Statistics + */ + public static Statistics collectStatistics(String alias, + Table table, + HiveConf conf, + TableScanOperator tableScanOperator, + ExprNodeDesc expr) { + + String dbname = table.getDbName(); + String tableName = table.getTableName(); + Statistics stats = new Statistics(); + DBStatistics dbStat = new DBStatistics(dbname); + TabStatistics tabStat = new TabStatistics(tableName); + Map prunedPartitionsMap = new HashMap(); + // column level statistics are required only for the columns that are needed + List neededColumns = tableScanOperator.getNeededColumns(); + tabStat.setNeededCols(neededColumns); + + if (!table.isPartitioned()) { + tabStat.setLevel(Level.TABLE); + + long nr = getTableStats(conf, table, StatsSetupConst.ROW_COUNT); + long rds = getTableStats(conf, table, StatsSetupConst.RAW_DATA_SIZE); + // if basic stats are not available then return + if (nr <= 0 && rds <= 0) { + tabStat.setBasicStatState(State.NONE); + dbStat.addToTableStats(tabStat); + stats.addToDBStatistics(dbStat); + return stats; + } + + // if any basic stats is missing, mark it as partial stats + if (nr <= 0 || rds <= 0) { + tabStat.setBasicStatState(State.PARTIAL); + } + + // if both are available then we have complete basic stats + if (nr > 0 && rds > 0) { + tabStat.setBasicStatState(State.COMPLETE); + } + tabStat.setNumRows(nr); + tabStat.setRawDataSize(rds); + + // get table level column statistics + List colStats = getTableColumnStats(table, neededColumns); + + // if column stats available and if atleast one column doesn't have stats + // then mark it as partial + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + tabStat.setColStatState(State.PARTIAL); + } + + // if column stats available and if all columns have stats then mark it + // as complete + if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + tabStat.setColStatState(State.COMPLETE); + } + + if (!checkIfColStatsAvailable(colStats)) { + tabStat.setColStatState(State.NONE); + tabStat.setColumnStats(null); + } else { + // set col stats and mark it as table level col stats + tabStat.setColumnStats(colStats); + } + } else { + tabStat.setLevel(Level.PARTITION); + + // For partitioned tables, get the size of all the partitions after pruning + // the partitions that are not required + PrunedPartitionList partsList; + try { + partsList = PartitionPruner.prune(table, expr, conf, alias, prunedPartitionsMap); + } catch (HiveException e) { + tabStat.setBasicStatState(State.NONE); + dbStat.addToTableStats(tabStat); + stats.addToDBStatistics(dbStat); + return stats; + } + + for (Partition part : partsList.getNotDeniedPartns()) { + PartStatistics partStat = new PartStatistics(part.getName()); + long nr = getPartitionStats(conf, part, StatsSetupConst.ROW_COUNT); + long rds = getPartitionStats(conf, part, StatsSetupConst.RAW_DATA_SIZE); + // if both basic stats are not available then mark it as stats not available + if (nr <= 0 && rds <= 0) { + partStat.setBasicStatState(State.NONE); + } else if (nr <= 0 || rds <= 0) { + partStat.setBasicStatState(State.PARTIAL); + } else { + partStat.setBasicStatState(State.COMPLETE); + } + partStat.setNumRows(nr); + partStat.setRawDataSize(rds); + + // get partition level column statistics + List colStats = getPartitionColumnStats(table, part, neededColumns); + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + partStat.setColStatState(State.PARTIAL); + } else if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + partStat.setColStatState(State.COMPLETE); + } else { + partStat.setColStatState(State.NONE); + } + + partStat.setColumnStats(colStats); + + // add partition will merge/accumulate partition level basic statistics + // to compute table level basic statistics + tabStat.addToPartitionStats(partStat); + } + } + + // add the table statistics to db statistics + dbStat.addToTableStats(tabStat); + + // add db statistics to statistics object + stats.addToDBStatistics(dbStat); + return stats; + } + + /** + * Get the partition level columns statistics from metastore for all the needed columns + * + * @param table + * @param part + * @param neededColumns + * @return column statistics + */ + private static List getPartitionColumnStats(Table table, Partition part, + List neededColumns) { + + List colStatistics = Lists.newArrayList(); + for (String col : neededColumns) { + ColumnStatistics colStats = null; + try { + colStats = Hive.get().getPartitionColumnStatistics(table.getDbName(), table.getTableName(), + part.getName(), col); + ColStatistics cs = new ColStatistics(); + for (ColumnStatisticsObj cso : colStats.getStatsObj()) { + cs.setColumnName(cso.getColName()); + cs.setColumnType(cso.getColType()); + ColumnStatisticsData csd = cso.getStatsData(); + if (csd.isSetBinaryStats()) { + cs.setAvgColLen((int) csd.getBinaryStats().getAvgColLen()); + cs.setNumNulls(csd.getBinaryStats().getNumNulls()); + } else if (cso.getStatsData().isSetBooleanStats()) { + cs.setCountDistint(2); + cs.setNumNulls(csd.getBooleanStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (cso.getStatsData().isSetDoubleStats()) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (cso.getStatsData().isSetLongStats()) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (cso.getStatsData().isSetStringStats()) { + cs.setCountDistint(csd.getStringStats().getNumDVs()); + cs.setNumNulls(csd.getStringStats().getNumNulls()); + cs.setAvgColLen((int) csd.getStringStats().getAvgColLen()); + } + } + colStatistics.add(cs); + } catch (HiveException e) { + // if we cannot get column statistics for specific columns then ignore + // the exception and add nulls to indicate its partial column statistics + colStatistics.add(null); + } + } + + return colStatistics; + } + + /** + * Will return true if column statistics for atleast one column is available + * + * @param colStats + * @return + */ + private static boolean checkIfColStatsAvailable(List colStats) { + for (ColStatistics cs : colStats) { + if (cs != null) { + return true; + } + } + return false; + } + + /** + * Get table level column statistics from metastore for needed columns + * + * @param table + * @param neededColumns + * @return column statistics + */ + private static List getTableColumnStats(Table table, List neededColumns) { + + List colStatistics = Lists.newArrayList(); + for (String col : neededColumns) { + ColumnStatistics colStats = null; + try { + colStats = Hive.get().getTableColumnStatistics(table.getDbName(), table.getTableName(), + col); + ColStatistics cs = new ColStatistics(); + for (ColumnStatisticsObj cso : colStats.getStatsObj()) { + cs.setColumnName(cso.getColName()); + cs.setColumnType(cso.getColType()); + String colType = cso.getColType(); + ColumnStatisticsData csd = cso.getStatsData(); + if (colType.equalsIgnoreCase("tinyint") || + colType.equalsIgnoreCase("smallint") || + colType.equalsIgnoreCase("int") || + colType.equalsIgnoreCase("bigint")) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + if (colType.equalsIgnoreCase("bigint")) { + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else { + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } + } else if (colType.equalsIgnoreCase("double") || + colType.equalsIgnoreCase("float")) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + if (colType.equalsIgnoreCase("double")) { + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else { + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } + } else if (colType.equalsIgnoreCase("string")) { + cs.setCountDistint(csd.getStringStats().getNumDVs()); + cs.setNumNulls(csd.getStringStats().getNumNulls()); + cs.setAvgColLen((int) csd.getStringStats().getAvgColLen()); + } else if (colType.equalsIgnoreCase("boolean")) { + cs.setCountDistint(2); + cs.setNumNulls(csd.getBooleanStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase("binary")) { + cs.setAvgColLen((int) csd.getBinaryStats().getAvgColLen()); + cs.setNumNulls(csd.getBinaryStats().getNumNulls()); + } else if (colType.equalsIgnoreCase("timestamp")) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + } else if (colType.equalsIgnoreCase("decimal")) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); + } else if (colType.equalsIgnoreCase("date")) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + } else { + // Statistics for complex datatypes are not supported + } + } + colStatistics.add(cs); + } catch (HiveException e) { + // if we cannot get column statistics for specific columns then ignore + // the exception and add nulls to indicate its partial column statistics + colStatistics.add(null); + } + } + + return colStatistics; + } + + /** + * Get partition level basic statistics + * + * @param conf + * @param part + * @param type + * @return + */ + private static long getPartitionStats(HiveConf conf, Partition part, String type) { + Path path = part.getPartitionPath(); + + if (StatsSetupConst.ROW_COUNT.equals(type)) { + return getNumRows(part.getParameters().get(type), path); + } else if (StatsSetupConst.RAW_DATA_SIZE.equals(type)) { + return getRawDataSize(conf, part, path); + } + + return 0; + } + + /** + * Get table level basic statistics + * + * @param conf + * @param table + * @param type + * @return + */ + private static long getTableStats(HiveConf conf, Table table, String type) { + Path path = table.getPath(); + + if (StatsSetupConst.ROW_COUNT.equals(type)) { + return getNumRows(table.getProperty(type), path); + } else if (StatsSetupConst.RAW_DATA_SIZE.equals(type)) { + return getRawDataSize(conf, table, path); + } + + return 0; + } + + /** + * Get raw data size (uncompressed size) from table/part params. If raw data size is + * not available then get total file size from table/part params. If total file size + * is also not available then get content summary of the file and read the + * file length (Calls FileSystem). + * + * @param conf + * @param object + * @param path + * @return raw data size + */ + private static long getRawDataSize(HiveConf conf, Object object, Path path) { + long size = 0; + Table table = null; + Partition part = null; + String rds = null; + String ts = null; + if (object instanceof Table) { + table = (Table) object; + rds = table.getProperty(StatsSetupConst.RAW_DATA_SIZE); + ts = table.getProperty(StatsSetupConst.TOTAL_SIZE); + } + + if (object instanceof Partition) { + part = (Partition) object; + rds = part.getParameters().get(StatsSetupConst.RAW_DATA_SIZE); + ts = part.getParameters().get(StatsSetupConst.TOTAL_SIZE); + } + + if (rds != null) { + try { + size = Long.valueOf(rds); + } catch (NumberFormatException e) { + size = 0; + } + } + + // check for total file size + if (size == 0 && ts != null) { + try { + size = Long.valueOf(ts); + } catch (NumberFormatException e) { + size = 0; + } + } + + // make file system call to get file length + if (size == 0) { + try { + FileSystem fs = path.getFileSystem(conf); + size = fs.getContentSummary(path).getLength(); + } catch (Exception e) { + size = 0; + } + } + return size; + } + + private static long getNumRows(String nr, Path path) { + // If the size is present in the metastore, use it + if (nr != null) { + try { + return Long.valueOf(nr); + } catch (NumberFormatException e) { + return 0; + } + } + + return 0; + } + + /** + * Given a list of selected columns and column statistics, this method will + * return the overall raw data size for the columns that are selected + * + * @param stats + * @param dbName + * @param tabName + * @param selColStats + * @param colSelections + * @return raw data size + */ + public static long getRawDataSizeForSelectedColumns(Statistics stats, String dbName, + String tabName, + List selColStats, boolean[] colSelections) { + long result = 0; + + if (selColStats == null) { + return result; + } + + long nonNullCount = 0; + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + nonNullCount = ts.getNumRows(); + } + } + + int idx = 0; + for (ColStatistics cs : selColStats) { + String colType = cs.getColumnType(); + + // if the column is selected then get stats else skip it + if (colSelections[idx]) { + nonNullCount = nonNullCount - cs.getNumNulls(); + + // FIXME: since we are not updating the colStats along with basicStats, there + // might be cases where nonNullCount become <=0. To avoid that we will not subtract + // the numNulls. This is hack to avoid negative row counts/raw data size + if (nonNullCount <= 0) { + nonNullCount += cs.getNumNulls(); + } + + if (colType.equalsIgnoreCase("tinyint") || colType.equalsIgnoreCase("smallint") || + colType.equalsIgnoreCase("int") || colType.equalsIgnoreCase("bigint") || + colType.equalsIgnoreCase("boolean") || colType.equalsIgnoreCase("float") || + colType.equalsIgnoreCase("double")) { + result += nonNullCount * cs.getAvgColLen(); + } else if (colType.equalsIgnoreCase("string")) { + result += nonNullCount * JavaDataModel.get().lengthForStringOfLength(cs.getAvgColLen()); + } else if (colType.equalsIgnoreCase("binary")) { + result += nonNullCount * JavaDataModel.get().lengthForByteArrayOfSize(cs.getAvgColLen()); + } else if (colType.equalsIgnoreCase("timestamp")) { + result += nonNullCount * JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.equalsIgnoreCase("decimal")) { + result += nonNullCount * JavaDataModel.get().lengthOfDecimal(); + } else if (colType.equalsIgnoreCase("date")) { + result += nonNullCount * JavaDataModel.get().lengthOfDate(); + } else { + result += nonNullCount * cs.getAvgColLen(); + } + } + idx++; + } + return result; + } + + /** + * Get the raw data size of variable length data types + * + * @param oi + * @param colType + * @return raw data size + */ + public static long getSizeOfVariableLengthTypes(ObjectInspector oi, String colType) { + if (colType.equalsIgnoreCase("string")) { + + // constant string projection Ex: select "hello" from table + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + if (coi.getWritableConstantValue() == null) { + return 0; + } + return JavaDataModel.get().lengthForStringOfLength( + coi.getWritableConstantValue().toString().length()); + } else if (oi instanceof WritableConstantStringObjectInspector) { + // some UDFs return writable constant strings (fixed width) + // Ex: select upper("hello") from table + WritableConstantStringObjectInspector wcsoi = (WritableConstantStringObjectInspector) oi; + return JavaDataModel.get().lengthForStringOfLength( + wcsoi.getWritableConstantValue().toString().length()); + } else if (oi instanceof WritableStringObjectInspector) { + // some UDFs may emit strings of variable length. like pattern matching + // UDFs. it's hard to find the length of such UDFs. + return 0; + } + } else if (colType.equalsIgnoreCase("binary")) { + // constant byte arrays + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + if (coi.getWritableConstantValue() == null) { + return 0; + } + BytesWritable bw = ((BytesWritable) coi.getWritableConstantValue()); + return JavaDataModel.get().lengthForByteArrayOfSize(bw.getLength()); + } else if (oi instanceof WritableConstantBinaryObjectInspector) { + // writable constant byte arrays + WritableConstantBinaryObjectInspector wcboi = (WritableConstantBinaryObjectInspector) oi; + return JavaDataModel.get().lengthForByteArrayOfSize( + wcboi.getWritableConstantValue().getLength()); + } else if (oi instanceof WritableBinaryObjectInspector) { + // variable byte arrays. it's hard to find the length + return 0; + } + } else { + // complex types (map, list, struct, union) + return getSizeOfComplexTypes(oi); + } + + return 0; + } + + /** + * get the size of complex data types + * + * @param oi + * @return raw data size + */ + private static long getSizeOfComplexTypes(ObjectInspector oi) { + long result = 0; + int length = 0; + switch (oi.getCategory()) { + case PRIMITIVE: + String colType = oi.getTypeName(); + if (colType.equalsIgnoreCase("string") || colType.equalsIgnoreCase("binary")) { + result += getSizeOfVariableLengthTypes(oi, colType); + } else { + result += getSizeOfFixedLengthPrimitivesFromType(colType); + } + break; + case LIST: + if (oi instanceof StandardConstantListObjectInspector) { + // constant list projection of known length + StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi; + length = scloi.getWritableConstantValue().size(); + // check if list elements are primitive or Objects + ObjectInspector leoi = scloi.getListElementObjectInspector(); + if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { + result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length); + } else { + result += JavaDataModel.get().lengthForObjectArrayOfSize(length); + } + } else { + StandardListObjectInspector sloi = (StandardListObjectInspector) oi; + result += getSizeOfComplexTypes(sloi.getListElementObjectInspector()); + } + break; + case MAP: + if (oi instanceof StandardConstantMapObjectInspector) { + // constant map projection of known length + StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi; + result += getSizeOfMap(scmoi); + } else { + StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi; + result += getSizeOfComplexTypes(smoi.getMapKeyObjectInspector()); + result += getSizeOfComplexTypes(smoi.getMapValueObjectInspector()); + } + break; + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + // add constant object overhead for struct + result += JavaDataModel.get().object(); + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + for (StructField field : soi.getAllStructFieldRefs()) { + result += getSizeOfComplexTypes(field.getFieldObjectInspector()); + } + break; + case UNION: + UnionObjectInspector uoi = (UnionObjectInspector) oi; + // add constant object overhead for union + result += JavaDataModel.get().object(); + // add constant size for unions tags + result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1(); + for (ObjectInspector foi : uoi.getObjectInspectors()) { + result += getSizeOfComplexTypes(foi); + } + break; + default: + break; + } + return result; + } + + /** + * get size of fixed length primitives + * + * @param colType + * @return raw data size + */ + public static long getSizeOfFixedLengthPrimitivesFromType(String colType) { + if (colType.equalsIgnoreCase("tinyint") || + colType.equalsIgnoreCase("smallint") || + colType.equalsIgnoreCase("int") || + colType.equalsIgnoreCase("boolean") || + colType.equalsIgnoreCase("byte") || + colType.equalsIgnoreCase("float")) { + return JavaDataModel.get().primitive1(); + } else if (colType.equalsIgnoreCase("double") || + colType.equalsIgnoreCase("bigint")) { + return JavaDataModel.get().primitive2(); + } else if (colType.equalsIgnoreCase("timestamp")) { + return JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.equalsIgnoreCase("date")) { + return JavaDataModel.get().lengthOfDate(); + } else if (colType.equalsIgnoreCase("decimal")) { + return JavaDataModel.get().lengthOfDecimal(); + } else { + return 0; + } + } + + /** + * get the size of arrays of primitive types + * + * @param colType + * @param length + * @return raw data size + */ + public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) { + if (colType.equalsIgnoreCase("tinyint") || + colType.equalsIgnoreCase("smallint") || + colType.equalsIgnoreCase("int") || + colType.equalsIgnoreCase("float")) { + return JavaDataModel.get().lengthForIntArrayOfSize(length); + } else if (colType.equalsIgnoreCase("double")) { + return JavaDataModel.get().lengthForDoubleArrayOfSize(length); + } else if (colType.equalsIgnoreCase("bigint")) { + return JavaDataModel.get().lengthForLongArrayOfSize(length); + } else if (colType.equalsIgnoreCase("binary")) { + return JavaDataModel.get().lengthForByteArrayOfSize(length); + } else if (colType.equalsIgnoreCase("boolean")) { + return JavaDataModel.get().lengthForBooleanArrayOfSize(length); + } else if (colType.equalsIgnoreCase("timestamp")) { + return JavaDataModel.get().lengthForTimestampArrayOfSize(length); + } else if (colType.equalsIgnoreCase("date")) { + return JavaDataModel.get().lengthForDateArrayOfSize(length); + } else if (colType.equalsIgnoreCase("decimal")) { + return JavaDataModel.get().lengthForDecimalArrayOfSize(length); + } else { + return 0; + } + } + + /** + * Estimate the size of map object + * + * @param scmoi + * @return size of map + */ + private static long getSizeOfMap(StandardConstantMapObjectInspector scmoi) { + Map map = scmoi.getWritableConstantValue(); + ObjectInspector koi = scmoi.getMapKeyObjectInspector(); + ObjectInspector voi = scmoi.getMapValueObjectInspector(); + long result = 0; + for (Map.Entry entry : map.entrySet()) { + result += getWritableSize(koi, entry.getKey()); + result += getWritableSize(voi, entry.getValue()); + } + + // add additional overhead of each map entries + result += JavaDataModel.get().hashMap(map.entrySet().size()); + return result; + } + + /** + * get size of primitive data types based on their respective writable + * object inspector + * + * @param oi + * @param value + * @return raw data size + */ + public static long getWritableSize(ObjectInspector oi, Object value) { + if (oi instanceof WritableStringObjectInspector) { + WritableStringObjectInspector woi = (WritableStringObjectInspector) oi; + return JavaDataModel.get().lengthForStringOfLength( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBinaryObjectInspector) { + WritableBinaryObjectInspector woi = (WritableBinaryObjectInspector) oi; + return JavaDataModel.get().lengthForByteArrayOfSize( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBooleanObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableByteObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableDateObjectInspector) { + return JavaDataModel.get().lengthOfDate(); + } else if (oi instanceof WritableDoubleObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableFloatObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableHiveDecimalObjectInspector) { + return JavaDataModel.get().lengthOfDecimal(); + } else if (oi instanceof WritableIntObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableLongObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableShortObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableTimestampObjectInspector) { + return JavaDataModel.get().lengthOfTimestamp(); + } + + return 0; + } + + /** + * Update the basic statistics of the statistics object based on the row number + * + * @param stats + * @param dbName + * @param tableName + * @param newNumRows + */ + public static void updateStats(Statistics stats, String dbName, + String tableName, long newNumRows) { + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tableName); + if (ts != null) { + long avgRowSize = ts.getAvgRowSize(); + ts.setNumRows(newNumRows); + ts.setRawDataSize(newNumRows * avgRowSize); + } + } + } + + /** + * For a given column return the distinct cardinality + * + * @param stats + * @param dbName + * @param tabName + * @param colName + * @return + */ + public static long getDistinctCountOfColumnFromTable(Statistics stats, String dbName, + String tabName, String colName) { + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null && !ts.getColStatState().equals(State.NONE)) { + ColStatistics cs = ts.getColumnStatisticsForColumn(colName); + if (cs != null) { + if (cs.getNumNulls() > 0) { + // consider NULL as another distinct value + return 1 + cs.getCountDistint(); + } + return cs.getCountDistint(); + } + } + } + return 0; + } + + /** + * For a given column return the distinct cardinality + * + * @param stats + * @param dbName + * @param partName + * @param colName + * @return + */ + public static long getDistinctCountOfColumnFromPartition(Statistics stats, String dbName, + String tabName, String partName, String colName) { + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + PartStatistics ps = ts.getPartitionStatisticsForPartition(partName); + if (ps != null) { + ColStatistics cs = ts.getColumnStatisticsForColumn(colName); + if (cs != null) { + if (cs.getNumNulls() > 0) { + // consider NULL as another distinct value + return 1 + cs.getCountDistint(); + } + return cs.getCountDistint(); + } + } + } + } + return 0; + } + + /** + * Walk the operator tree and find the spot where original column alias to + * internal mapping is done. Update the mappings in the stats object. + * + * @param stats + * @param curOp + */ + public static void populateAliasToIntColNameMappings(Statistics stats, + Operator curOp) { + if (curOp == null) { + return; + } + + for (Operator op : curOp.getParentOperators()) { + if (op instanceof TableScanOperator) { + TableScanOperator tsop = (TableScanOperator) op; + String dbName = tsop.getConf().getTable().getDbName(); + String tabName = tsop.getConf().getTable().getTableName(); + + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + Map outColExprMap = ts.getOutColExprMap(); + if (outColExprMap == null) { + outColExprMap = Maps.newHashMap(); + ts.setOutColExprMap(outColExprMap); + } + + // Special case: no projection on map side. If there is projection in + // map side then column mapping will be done in reduce sink. + if (!checkForMapSideSelectOp(tsop)) { + ReduceSinkOperator rso = getReduceSinkOperator(tsop); + // if we couldn't find RS then it might be FS and map-only job + // in which we don't need column mapping anymore + if (rso == null) { + return; + } + ArrayList outValColNames = rso.getConf().getOutputValueColumnNames(); + ArrayList exprNode = rso.getConf().getValueCols(); + for (int i = 0; i < outValColNames.size(); i++) { + outColExprMap.put(outValColNames.get(i), exprNode.get(i)); + } + } else { + // TS operator should have SEL as its children. The output expression + // map will contain mapping between table column aliases and internal + // names. We need this information to handle the case when projection + // uses alias renaming + getInternalColumnMapping(tsop, outColExprMap); + } + } + } + } else { + populateAliasToIntColNameMappings(stats, op); + } + } + } + + private static ReduceSinkOperator getReduceSinkOperator(TableScanOperator tsop) { + Operator op = tsop; + while (op.getChildOperators() != null) { + for (Operator child : op.getChildOperators()) { + // we have hit RS + if (child instanceof ReduceSinkOperator) { + return (ReduceSinkOperator) child; + } + op = child; + } + } + return null; + } + + private static boolean checkForMapSideSelectOp(TableScanOperator tsop) { + Operator op = tsop; + while (op.getChildOperators() != null) { + for (Operator child : op.getChildOperators()) { + // we have hit RS before select so return there is not map side SEL + if (child instanceof ReduceSinkOperator) { + return false; + } + + if (child instanceof SelectOperator) { + return true; + } + op = child; + } + } + return false; + } + + private static void getInternalColumnMapping(Operator curOp, + Map colMapping) { + if (curOp == null) { + return; + } + for (Operator child : curOp.getChildOperators()) { + Map result = child.getColumnExprMap(); + if (result != null) { + for (String key : result.keySet()) { + if (key.startsWith("_col")) { + // we found the internal column mappings + colMapping.putAll(result); + } + } + + // we further need to descend down + if (colMapping.size() == 0) { + getInternalColumnMapping(child, colMapping); + } + } else { + getInternalColumnMapping(child, colMapping); + } + } + } + + /** + * If column expression map is available return it else compute it + * + * @param op + * @param dbName + * @param tabName + * @param stats + * @return column expression map + */ + public static Map getOuputColumnExprMap( + Operator op, String dbName, String tabName, + Statistics stats) { + Map outColExprMap = null; + + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + outColExprMap = ts.getOutColExprMap(); + if (outColExprMap == null) { + outColExprMap = Maps.newHashMap(); + populateAliasToIntColNameMappings(stats, op); + ts.setOutColExprMap(outColExprMap); + } + } + } + + return outColExprMap; + } + + /** + * If table alias map is available return it else compute it + * + * @param op + * @param dbName + * @param tabName + * @param stats + * @return table alias map + */ + public static Map getTableAliasMap(Operator op, + String dbName, String tabName, Statistics stats) { + Map aliasMap = null; + DBStatistics dbs = stats.getDBStatisticsForDB(dbName); + if (dbs != null) { + TabStatistics ts = dbs.getTableStatsForTable(tabName); + if (ts != null) { + aliasMap = ts.getTableAliasMap(); + if (aliasMap == null) { + aliasMap = Maps.newHashMap(); + getAliasToTableNameMapping(op, aliasMap); + } + } + } + + return aliasMap; + } + + private static void getAliasToTableNameMapping(Operator curOp, + Map aliasMap) { + if (curOp == null) { + return; + } + + for (Operator parent : curOp.getParentOperators()) { + if (parent instanceof TableScanOperator) { + TableScanOperator tsop = (TableScanOperator) parent; + String alias = tsop.getConf().getAlias(); + String tabName = tsop.getConf().getTable().getTableName(); + aliasMap.put(alias, tabName); + } else { + getAliasToTableNameMapping(parent, aliasMap); + } + } + } + + /** + * Get roots of a give operator + * + * @param curOp + * @return + */ + public static List getRoots(Operator curOp) { + List result = Lists.newArrayList(); + getRootsImpl(curOp, result); + return result; + } + + private static void getRootsImpl(Operator curOp, + List result) { + + for (Operator parent : curOp.getParentOperators()) { + if (parent instanceof TableScanOperator) { + TableScanOperator tsop = (TableScanOperator) parent; + result.add(tsop); + } else { + getRootsImpl(parent, result); + } + } + } + + /** + * Estimate the size of constant or UDF projections based on the output + * schema of the columns + * + * @param numRows + * @param colList + * @return raw data size + */ + public static long getConstOrUDFProjectionSize(long numRows, List colList) { + long result = 0; + for (ExprNodeDesc end : colList) { + String colType = null; + ObjectInspector oi = null; + + // constant projection + if (end instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; + colType = encd.getTypeString(); + oi = encd.getWritableObjectInspector(); + } else if (end instanceof ExprNodeGenericFuncDesc) { + // udf projection + ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; + colType = engfd.getTypeString(); + oi = engfd.getWritableObjectInspector(); + } else if (end instanceof ExprNodeColumnDesc) { + // virtual columns + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + if (encd.getIsPartitionColOrVirtualCol()) { + colType = encd.getTypeInfo().getTypeName(); + oi = encd.getWritableObjectInspector(); + } else { + // else it is column projection. + continue; + } + } else { + continue; + } + + if (colType.equalsIgnoreCase("string") || + colType.equalsIgnoreCase("binary") || + colType.startsWith("array") || + colType.startsWith("map") || + colType.startsWith("struct") || + colType.startsWith("union")) { + // if constants are immutable then there will be only one copy until + // a reduce or file sink reached. for now we will assume that for each + // row there will be a copy of the constants that is projected + result += numRows * StatsUtils.getSizeOfVariableLengthTypes(oi, colType); + } else { + result += numRows * StatsUtils.getSizeOfFixedLengthPrimitivesFromType(colType); + } + } + + return result; + } + + /** + * Check if there are any constant, UDF or virtual column projections + * + * @param colList + * @return + */ + public static boolean checkForConstOrUDFProjection(List colList) { + for (ExprNodeDesc end : colList) { + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + // newly added virtual column + if (encd.getIsPartitionColOrVirtualCol()) { + return true; + } + } + if (end instanceof ExprNodeConstantDesc || end instanceof ExprNodeGenericFuncDesc) { + return true; + } + } + return false; + } + + /** + * Given a list of ExprNodeDesc (projections), this function will return the + * column names as list of strings + * + * @param colList + * @return list of column names + */ + public static List getProjectedColumns(List colList) { + List result = Lists.newArrayList(); + for (ExprNodeDesc col : colList) { + if (col instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) col; + result.add(encd.getColumn()); + } + } + return result; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java index 9c3c4c0..3352a08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java +++ ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java @@ -267,6 +267,15 @@ public int lengthForIntArrayOfSize(int length) { public int lengthForBooleanArrayOfSize(int length) { return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length); } + public int lengthForTimestampArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length); + } + public int lengthForDateArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDate(), length); + } + public int lengthForDecimalArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length); + } public int lengthOfDecimal() { // object overhead + 8 bytes for intCompact + 4 bytes for precision diff --git ql/src/test/queries/clientpositive/annotate_stats_filter.q ql/src/test/queries/clientpositive/annotate_stats_filter.q new file mode 100644 index 0000000..2f39025 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_filter.q @@ -0,0 +1,75 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH'; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where state='OH'; + +-- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where state!='OH'; +explain extended select * from loc_orc where state<>'OH'; + +-- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where zip is null; +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where !(zip is not null); + +-- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where zip is not null; +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where !(zip is null); + +-- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where !false; +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true; + +-- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where state='OH' or state='CA'; + +-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where year=2001 and year is null; +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL'; + +-- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 297 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA'); + +-- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA'); + +-- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where locid < 30; +explain extended select * from loc_orc where locid > 30; +explain extended select * from loc_orc where locid <= 30; +explain extended select * from loc_orc where locid >= 30; + diff --git ql/src/test/queries/clientpositive/annotate_stats_groupby.q ql/src/test/queries/clientpositive/annotate_stats_groupby.q new file mode 100644 index 0000000..2229e2e --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_groupby.q @@ -0,0 +1,44 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- only one distinct value in year column + 1 NULL value +-- map-side and reduce-side GBY : numRows: 2 +explain extended select year from loc_orc group by year; + +-- map-side and reduce-side GBY : numRows: 4 +explain extended select state,locid from loc_orc group by state,locid; + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid with cube; + +-- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid with rollup; + +-- map-side GBY numRows: 4 reduce-side GBY numRows: 2 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)); + +-- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); + +-- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); + diff --git ql/src/test/queries/clientpositive/annotate_stats_join.q ql/src/test/queries/clientpositive/annotate_stats_join.q new file mode 100644 index 0000000..0f7b130 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_join.q @@ -0,0 +1,36 @@ +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists dept_staging ( + deptname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +create table if not exists dept_orc like dept_staging; +alter table dept_orc set fileformat orc; + +LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; +LOAD DATA LOCAL INPATH '../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging; + +insert overwrite table emp_orc select * from emp_staging; +insert overwrite table dept_orc select * from dept_staging; + +analyze table emp_orc compute statistics for columns lastname,deptid; + +-- no statistics will be displayed for this case as column statistics for table dept_orc is not available yet +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +analyze table dept_orc compute statistics for columns deptname,deptid; + +-- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +set hive.auto.convert.join=false; + +-- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); diff --git ql/src/test/queries/clientpositive/annotate_stats_limit.q ql/src/test/queries/clientpositive/annotate_stats_limit.q new file mode 100644 index 0000000..2694ec9 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_limit.q @@ -0,0 +1,27 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4; + +-- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16; + +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0; + diff --git ql/src/test/queries/clientpositive/annotate_stats_part.q ql/src/test/queries/clientpositive/annotate_stats_part.q new file mode 100644 index 0000000..cf0e61a --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_part.q @@ -0,0 +1,66 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; + +create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc; + +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc; + +set hive.stats.autogather=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table loc_orc partition(year) select * from loc_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc; + +-- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics; +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001; + +-- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__'; +-- both partitions will be pruned +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__'; + +-- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select zip from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state from loc_orc; + +-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state,locid from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001; +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select * from loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_ptf.q ql/src/test/queries/clientpositive/annotate_stats_ptf.q new file mode 100644 index 0000000..69b3bbd --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_ptf.q @@ -0,0 +1,30 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid,zip) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between unbounded preceding and current row) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between 3 preceding and current row) from loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_select.q ql/src/test/queries/clientpositive/annotate_stats_select.q new file mode 100644 index 0000000..85cd236 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_select.q @@ -0,0 +1,101 @@ +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +create table alltypes_orc like alltypes; +alter table alltypes_orc set fileformat orc; + +load data local inpath '/work/hive/trunk/hive-git/data/files/alltypes.txt' overwrite into table alltypes; + +insert overwrite table alltypes_orc select * from alltypes; + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1; + +-- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc; + +-- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 172 +explain extended select s1 from alltypes_orc; + +-- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 244 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc; + +-- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc; + +-- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; + +-- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; + +-- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc; + +-- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; + +-- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc; + +-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(*) from alltypes_orc; + +-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(1) from alltypes_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_table.q ql/src/test/queries/clientpositive/annotate_stats_table.q new file mode 100644 index 0000000..3815189 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_table.q @@ -0,0 +1,42 @@ +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +-- basicStatState: NONE level: TABLE colStatState: NONE +explain extended select * from emp_orc; + +LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; + +set hive.stats.autogather=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table emp_orc select * from emp_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE +explain extended select * from emp_orc; + +-- table level analyze statistics +analyze table emp_orc compute statistics; + +-- basicStatState: COMPLETE level: TABLE colStatState: NONE +explain extended select * from emp_orc; + +-- column level partial statistics +analyze table emp_orc compute statistics for columns deptid; +-- basicStatState: COMPLETE level: TABLE colStatState: PARTIAL +explain extended select * from emp_orc; +-- all selected columns have statistics +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select deptid from emp_orc; + +-- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid; +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select * from emp_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_union.q ql/src/test/queries/clientpositive/annotate_stats_union.q new file mode 100644 index 0000000..5598a39 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_union.q @@ -0,0 +1,52 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 8 rawDataSize: 680 +explain extended select state from loc_orc; + +-- numRows: 16 rawDataSize: 1360 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp; + +create database test; +use test; +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_staging compute statistics for columns state,locid,zip,year; +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- there should be 2 entries for DB statistics. Since there are 2 different DBs statistics can't be merged +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp; + +-- there should be 2 entries for Table statistics. Since there are 2 different tables statistics can't be merged +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp; diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out new file mode 100644 index 0000000..ff7c222 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -0,0 +1,2470 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where state!='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where state!='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where state<>'OH' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where state<>'OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<> (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where zip is null +PREHOOK: type: QUERY +POSTHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where zip is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is null + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where !(zip is not null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where !(zip is not null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is not null) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where zip is not null +PREHOOK: type: QUERY +POSTHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where zip is not null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is not null + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 693 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 693 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 693 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where !(zip is null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where !(zip is null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is null) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 693 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 693 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 693 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where !false +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where !false +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! false)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not false) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! true)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not true) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where state='OH' or state='CA' +PREHOOK: type: QUERY +POSTHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where state='OH' or state='CA' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL state) 'OH') (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((state = 'OH') or (state = 'CA')) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where year=2001 and year is null +PREHOOK: type: QUERY +POSTHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where year=2001 and year is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and year is null) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL state) 'OH')) (= (TOK_TABLE_OR_COL state) 'FL'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and (state = 'OH')) and (state = 'FL')) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 297 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 297 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and year is null) or (state = 'CA')) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 3 rawDataSize: 297 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 3 rawDataSize: 297 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 3 rawDataSize: 297 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (or (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) or year is null) and (state = 'CA')) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 99 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where locid < 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where locid < 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid < 30) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid > 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid > 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid > 30) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid <= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid <= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid <= 30) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid >= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid >= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (>= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid >= 30) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out new file mode 100644 index 0000000..64cf02a --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -0,0 +1,1426 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side and reduce-side GBY : numRows: 2 +explain extended select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side and reduce-side GBY : numRows: 2 +explain extended select year from loc_orc group by year +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: year + type: int + outputColumnNames: year + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: year + type: int + mode: hash + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 4 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 4 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side and reduce-side GBY : numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side and reduce-side GBY : numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + mode: hash + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 356 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 356 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1584 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1584 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 712 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 712 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid with rollup +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 12 rawDataSize: 1188 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 12 rawDataSize: 1188 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 6 rawDataSize: 594 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 6 rawDataSize: 534 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 6 rawDataSize: 534 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 4 reduce-side GBY numRows: 2 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 4 reduce-side GBY numRows: 2 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 198 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 178 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 2 rawDataSize: 178 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 356 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 356 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 12 rawDataSize: 1188 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 12 rawDataSize: 1188 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 6 rawDataSize: 594 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 6 rawDataSize: 534 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 6 rawDataSize: 534 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1584 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1584 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 792 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 712 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 712 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out new file mode 100644 index 0000000..4b3d247 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -0,0 +1,739 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists dept_staging ( + deptname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_staging ( + deptname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: create table if not exists dept_orc like dept_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_orc like dept_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_orc +PREHOOK: query: alter table dept_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@dept_orc +PREHOOK: Output: default@dept_orc +POSTHOOK: query: alter table dept_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@dept_orc +POSTHOOK: Output: default@dept_orc +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@dept_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dept_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: insert overwrite table dept_orc select * from dept_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_staging +PREHOOK: Output: default@dept_orc +POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_staging +POSTHOOK: Output: default@dept_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- no statistics will be displayed for this case as column statistics for table dept_orc is not available yet +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- no statistics will be displayed for this case as column statistics for table dept_orc is not available yet +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: dept_orc numRows: 4 rawDataSize: 344 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: dept_orc numRows: 4 rawDataSize: 344 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + tag: 1 + value expressions: + expr: deptname + type: string + expr: deptid + type: int + e + TableScan + alias: e + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 560 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 560 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: dept_orc numRows: 4 rawDataSize: 344 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: 1 + value expressions: + expr: deptname + type: string + expr: deptid + type: int + e + TableScan + alias: e + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 560 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 560 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 558 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE, tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 558 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE, tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 558 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE, tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: dept_orc numRows: 4 rawDataSize: 344 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: 1 + value expressions: + expr: deptname + type: string + expr: deptid + type: int + e + TableScan + alias: e + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 560 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 560 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 558 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE, tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 558 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE, tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 558 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE, tabName: dept_orc numRows: 6 rawDataSize: 516 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out new file mode 100644 index 0000000..8f50528 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 4))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Limit + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 4 rawDataSize: 396 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +PREHOOK: type: QUERY +POSTHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 16))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 16 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Limit + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 0))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Limit + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out new file mode 100644 index 0000000..39a12ea --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -0,0 +1,1822 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: -- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 621 basicStatState: PARTIAL level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 621 basicStatState: PARTIAL level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 277 basicStatState: PARTIAL level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 277 basicStatState: PARTIAL level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 621 basicStatState: PARTIAL level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 621 basicStatState: PARTIAL level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 344 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 344 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 277 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 277 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: PARTITION colStatState: NONE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and (year = '__HIVE_DEFAULT_PARTITION__')) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: PARTITION colStatState: NONE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: PARTITION colStatState: NONE]] + ListSink + + +PREHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select zip from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select zip from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: zip + type: bigint + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state,locid from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state,locid from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 344 basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 344 basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 7 rawDataSize: 344 basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (!= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 277 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (year <> 2001) + type: boolean + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 277 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 277 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 1 rawDataSize: 277 basicStatState: COMPLETE level: PARTITION colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 621 basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL]] + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_ptf.q.out ql/src/test/results/clientpositive/annotate_stats_ptf.q.out new file mode 100644 index 0000000..c5f669d --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_ptf.q.out @@ -0,0 +1,769 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL locid))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: locid + type: int + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + PTF Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid,zip) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid,zip) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL locid) (TOK_TABLE_OR_COL zip))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + expr: zip + type: bigint + expr: locid + type: int + expr: zip + type: bigint + sort order: ++++ + Map-reduce partition columns: + expr: locid + type: int + expr: zip + type: bigint + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + PTF Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL zip)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL locid)))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: zip + type: bigint + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: zip + type: bigint + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + PTF Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between unbounded preceding and current row) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between unbounded preceding and current row) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL zip)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL locid)))) (TOK_WINDOWRANGE (preceding unbounded) current))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: zip + type: bigint + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: zip + type: bigint + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + PTF Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between 3 preceding and current row) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between 3 preceding and current row) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL zip)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL locid)))) (TOK_WINDOWRANGE (preceding 3) current))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Reduce Output Operator + key expressions: + expr: zip + type: bigint + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: zip + type: bigint + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + PTF Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 120 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out new file mode 100644 index 0000000..8485597 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -0,0 +1,2955 @@ +PREHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table alltypes_orc like alltypes +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alltypes_orc like alltypes +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: load data local inpath '/work/hive/trunk/hive-git/data/files/alltypes.txt' overwrite into table alltypes +PREHOOK: type: LOAD +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '/work/hive/trunk/hive-git/data/files/alltypes.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +POSTHOOK: Output: default@alltypes +PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes +POSTHOOK: Output: default@alltypes_orc +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- basicStatState: PARTIAL level: TABLE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL level: TABLE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: PARTIAL]] + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: PARTIAL]] + ListSink + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 8 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 8 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 8 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 8 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 172 +explain extended select s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 172 +explain extended select s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: s1 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 172 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 172 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: m1 + type: map + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 244 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 244 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1)) (TOK_SELEXPR (TOK_TABLE_OR_COL ti1)) (TOK_SELEXPR (TOK_TABLE_OR_COL si1)) (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR (TOK_TABLE_OR_COL bi1)) (TOK_SELEXPR (TOK_TABLE_OR_COL f1)) (TOK_SELEXPR (TOK_TABLE_OR_COL d1)) (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: s1 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 244 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 244 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types boolean:tinyint:smallint:int:bigint:float:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_NULL)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: null + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 0 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: 11 + type: int + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 8 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 8 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11L)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: 11 + type: bigint + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 16 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 16 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11.0)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: 11.0 + type: double + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 16 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 16 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: 'hello' + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 178 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 178 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: unbase64('0xe23') + type: binary + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 96 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 96 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types binary + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TINYINT "1")) (TOK_SELEXPR (TOK_FUNCTION TOK_SMALLINT "20"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: UDFToByte('1') + type: tinyint + expr: UDFToShort('20') + type: smallint + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 16 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 16 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types tinyint:smallint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS TIMESTAMP) + type: timestamp + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 80 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 80 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types timestamp + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DATE "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS DATE) + type: date + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 112 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 112 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types date + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DECIMAL "58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: CAST( '58.174' AS DECIMAL) + type: decimal + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 224 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 224 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types decimal + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION array 1 2 3))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 112 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 112 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types array + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map "a=1 b=2 c=3" " " "="))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: str_to_map('a=1 b=2 c=3',' ','=') + type: map + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1508 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1508 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NAMED_STRUCT "a" 11 "b" 11))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: named_struct('a',11,'b',11) + type: struct + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 112 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 112 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CREATE_UNION 0 "hello"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: create_union(0,'hello') + type: uniontype + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 250 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 250 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types uniontype + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(*) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(*) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Reduce Output Operator + sort order: + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(1) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(1) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 2 rawDataSize: 1514 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Reduce Output Operator + sort order: + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal:timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1214 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: alltypes_orc numRows: 1 rawDataSize: 757 basicStatState: COMPLETE level: TABLE colStatState: NONE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out new file mode 100644 index 0000000..fccc67e --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -0,0 +1,369 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: -- basicStatState: NONE level: TABLE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE level: TABLE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 0 rawDataSize: 0 basicStatState: NONE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 0 rawDataSize: 300 basicStatState: PARTIAL level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 0 rawDataSize: 300 basicStatState: PARTIAL level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: NONE]] + ListSink + + +PREHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: PARTIAL +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: PARTIAL +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: PARTIAL]] + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: PARTIAL]] + ListSink + + +PREHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: deptid + type: int + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 20 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 20 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: emp_orc numRows: 6 rawDataSize: 300 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out new file mode 100644 index 0000000..c05f189 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -0,0 +1,1111 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 680 +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 680 +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1360 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1360 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1360 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1360 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1360 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1360 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1360 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1360 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + ListSink + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1592 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1592 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1592 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1592 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1592 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 16 rawDataSize: 1592 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: create database test +PREHOOK: type: CREATEDATABASE +POSTHOOK: query: create database test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: use test +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use test +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: test@loc_orc +PREHOOK: Output: test@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: test@loc_orc +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: test@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- there should be 2 entries for DB statistics. Since there are 2 different DBs statistics can't be merged +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- there should be 2 entries for DB statistics. Since there are 2 different DBs statistics can't be merged +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE], dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE], dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE], dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE], dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE], dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: default tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE], dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:temp-subquery1:loc_orc] + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- there should be 2 entries for Table statistics. Since there are 2 different tables statistics can't be merged +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- there should be 2 entries for Table statistics. Since there are 2 different tables statistics can't be merged +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_staging))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_staging + TableScan + alias: loc_staging + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE, tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE, tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE, tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 796 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Union + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE, tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE, tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + dbStats: [ dbName: test tabStats: [ tabName: loc_staging numRows: 0 rawDataSize: 125 basicStatState: PARTIAL level: TABLE colStatState: COMPLETE, tabName: loc_orc numRows: 8 rawDataSize: 680 basicStatState: COMPLETE level: TABLE colStatState: COMPLETE]] +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_staging + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: test.loc_staging + name: test.loc_staging + Truncated Path -> Alias: + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + /test.db/loc_staging [null-subquery1:temp-subquery1:loc_staging] + + Stage: Stage-0 + Fetch Operator + limit: -1 + +