diff --git data/files/dept.txt data/files/dept.txt index 292bee6..07e8e07 100644 --- data/files/dept.txt +++ data/files/dept.txt @@ -2,3 +2,5 @@ 33|engineering 34|clerical 35|marketing +36|transport +37|hr diff --git data/files/emp.txt data/files/emp.txt index a0e76b9..1b41f75 100644 --- data/files/emp.txt +++ data/files/emp.txt @@ -1,6 +1,48 @@ -Rafferty|31 -Jones|33 -Steinberg|33 -Robinson|34 -Smith|34 -John| +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 +Rafferty|31|1 +Jones|33|2 +Steinberg|33|3 +Robinson|34|4 +Smith|34|5 +John|31|6 diff --git data/files/loc.txt data/files/loc.txt index 69910b7..f5f41d7 100644 --- data/files/loc.txt +++ data/files/loc.txt @@ -1,8 +1,8 @@ -OH|31|43201|2001 -IO|32|43202|2001 -CA|35|43809|2001 -FL|33|54342|2001 -UT|35||2001 -CA|35|43809|2001 -|34|40000| -FL|33|54342|2001 +OH|1|43201|2001 +IO|2|43202|2001 +CA|5|43809|2001 +FL|3|54342|2001 +UT|5||2001 +CA|5|43809|2001 +|4|40000| +FL|6|54342|2001 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java index ccd102a..4aeeff2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -62,6 +63,8 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule()); opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"), StatsRulesProcFactory.getLimitRule()); + opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getReduceSinkRule()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 9620e62..3b99921 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -598,12 +598,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } dvProd *= dv; } else { - - // partial column statistics on grouping attributes case. - // if column statistics on grouping attribute is missing, then - // assume worst case. - // GBY rule will emit half the number of rows if dvProd is 0 - dvProd = 0; + if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { + // the column must be an aggregate column inserted by GBY. We + // don't have to account for this column when computing product + // of NDVs + continue; + } else { + // partial column statistics on grouping attributes case. + // if column statistics on grouping attribute is missing, then + // assume worst case. + // GBY rule will emit half the number of rows if dvProd is 0 + dvProd = 0; + } break; } } @@ -684,7 +690,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, aggColStats.add(cs); } } - stats.addToColumnStats(aggColStats); + + // add the new aggregate column and recompute data size + if (aggColStats.size() > 0) { + stats.addToColumnStats(aggColStats); + updateStats(stats, stats.getNumRows(), true); + } // if UDAF present and if column expression map is empty then it must // be full aggregation query like count(*) in which case number of @@ -731,15 +742,24 @@ private long applyGBYRule(long numRows, long dvProd) { *

* In the absence of histograms, we can use the following general case *

- * Single attribute + * 2 Relations, 1 attribute *

* T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute *

- * Multiple attributes + * 2 Relations, 2 attributes *

* T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join * attributes *

+ * 3 Relations, 1 attributes + *

+ * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y), V(S,y), V(Q,y)), where y is the join attribute + *

+ * 3 Relations, 2 attributes + *

+ * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y1), V(S,y1), V(Q,y1)) * top2largest(V(R,y2), V(S,y2), V(Q,y2)), + * where y1 and y2 are the join attributes + *

* Worst case: If no column statistics are available, then T(RXS) = joinFactor * max(T(R), * T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor * hive config. In the worst case, since we do not know any information about join keys (and hence @@ -780,9 +800,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // statistics object that is combination of statistics from all // relations involved in JOIN Statistics stats = new Statistics(); - long prodRows = 1; + List rowCountParents = Lists.newArrayList(); List distinctVals = Lists.newArrayList(); + + // 2 relations, multiple attributes boolean multiAttr = false; + int numAttr = 1; Map joinedColStats = Maps.newHashMap(); Map> joinKeys = Maps.newHashMap(); @@ -792,12 +815,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); Statistics parentStats = parent.getStatistics(); - prodRows *= parentStats.getNumRows(); + rowCountParents.add(parentStats.getNumRows()); List keyExprs = parent.getConf().getKeyCols(); // multi-attribute join key if (keyExprs.size() > 1) { multiAttr = true; + numAttr = keyExprs.size(); } // compute fully qualified join key column names. this name will be @@ -808,16 +832,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap()); joinKeys.put(pos, fqCols); - Map colExprMap = parent.getColumnExprMap(); - RowSchema rs = parent.getSchema(); - // get column statistics for all output columns - List cs = - StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs); - for (ColStatistics c : cs) { - if (c != null) { - joinedColStats.put(c.getFullyQualifiedColName(), c); - } + for (ColStatistics cs : parentStats.getColumnStats()) { + joinedColStats.put(cs.getFullyQualifiedColName(), cs); } // since new statistics is derived from all relations involved in @@ -831,10 +848,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, long denom = 1; if (multiAttr) { List perAttrDVs = Lists.newArrayList(); - int numAttr = joinKeys.get(0).size(); for (int idx = 0; idx < numAttr; idx++) { for (Integer i : joinKeys.keySet()) { String col = joinKeys.get(i).get(idx); + col = StatsUtils.stripPrefixFromColumnName(col); ColStatistics cs = joinedColStats.get(col); if (cs != null) { perAttrDVs.add(cs.getCountDistint()); @@ -850,6 +867,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { for (List jkeys : joinKeys.values()) { for (String jk : jkeys) { + jk = StatsUtils.stripPrefixFromColumnName(jk); ColStatistics cs = joinedColStats.get(jk); if (cs != null) { distinctVals.add(cs.getCountDistint()); @@ -859,6 +877,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, denom = getDenominator(distinctVals); } + // Update NDV of joined columns to be min(V(R,y), V(S,y)) + if (multiAttr) { + updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr); + } + // column statistics from different sources are put together and rename // fully qualified column names based on output schema of join operator Map colExprMap = jop.getColumnExprMap(); @@ -875,7 +898,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ColStatistics cs = joinedColStats.get(fqColName); String outColName = key; String outTabAlias = ci.getTabAlias(); - outColName = StatsUtils.stripPrefixFromColumnName(outColName); if (cs != null) { cs.setColumnName(outColName); cs.setTableAlias(outTabAlias); @@ -886,13 +908,21 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // update join statistics stats.setColumnStats(outColStats); - long newRowCount = prodRows / denom; + long newRowCount = computeNewRowCount(rowCountParents, denom); + + if (newRowCount <= 0 && LOG.isDebugEnabled()) { + newRowCount = 0; + LOG.debug("[0] STATS-" + jop.toString() + ": Product of #rows might be greater than" + + " denominator or overflow might have occurred. Resetting row count to 0." + + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom); + } + stats.setNumRows(newRowCount); stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats)); jop.setStatistics(stats); if (LOG.isDebugEnabled()) { - LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString()); + LOG.debug("[1] STATS-" + jop.toString() + ": " + stats.extendedToString()); } } else { @@ -927,13 +957,72 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, jop.setStatistics(wcStats); if (LOG.isDebugEnabled()) { - LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString()); + LOG.debug("[2] STATS-" + jop.toString() + ": " + wcStats.extendedToString()); } } } return null; } + private long computeNewRowCount(List rowCountParents, long denom) { + double factor = 0.0d; + long result = 1; + long max = rowCountParents.get(0); + long maxIdx = 0; + + // To avoid long overflow, we will divide the max row count by denominator + // and use that factor to multiply with other row counts + for (int i = 1; i < rowCountParents.size(); i++) { + if (rowCountParents.get(i) > max) { + max = rowCountParents.get(i); + maxIdx = i; + } + } + + factor = (double) max / (double) denom; + + for (int i = 0; i < rowCountParents.size(); i++) { + if (i != maxIdx) { + result *= rowCountParents.get(i); + } + } + + result = (long) (result * factor); + + return result; + } + + private void updateJoinColumnsNDV(Map> joinKeys, + Map joinedColStats, int numAttr) { + int joinColIdx = 0; + while (numAttr > 0) { + long minNDV = Long.MAX_VALUE; + + // find min NDV for joining columns + for (Map.Entry> entry : joinKeys.entrySet()) { + String key = entry.getValue().get(joinColIdx); + key = StatsUtils.stripPrefixFromColumnName(key); + ColStatistics cs = joinedColStats.get(key); + if (cs != null && cs.getCountDistint() < minNDV) { + minNDV = cs.getCountDistint(); + } + } + + // set min NDV value to both columns involved in join + if (minNDV != Long.MAX_VALUE) { + for (Map.Entry> entry : joinKeys.entrySet()) { + String key = entry.getValue().get(joinColIdx); + key = StatsUtils.stripPrefixFromColumnName(key); + ColStatistics cs = joinedColStats.get(key); + cs.setCountDistint(minNDV); + } + } + + joinColIdx++; + numAttr--; + } + } + private long getDenominator(List distinctVals) { if (distinctVals.isEmpty()) { @@ -951,16 +1040,23 @@ private long getDenominator(List distinctVals) { return Collections.max(distinctVals); } else { + // remember min value and ignore it from the denominator + long minNDV = distinctVals.get(0); + int minIdx = 0; + + for (int i = 1; i < distinctVals.size(); i++) { + if (distinctVals.get(i) < minNDV) { + minNDV = distinctVals.get(i); + minIdx = i; + } + } + // join from multiple relations: - // denom = max(v1, v2) * max(v2, v3) * max(v3, v4) + // denom = Product of all NDVs except the least of all long denom = 1; - for (int i = 0; i < distinctVals.size() - 1; i++) { - long v1 = distinctVals.get(i); - long v2 = distinctVals.get(i + 1); - if (v1 >= v2) { - denom *= v1; - } else { - denom *= v2; + for (int i = 0; i < distinctVals.size(); i++) { + if (i != minIdx) { + denom *= distinctVals.get(i); } } return denom; @@ -1029,6 +1125,65 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } /** + * ReduceSink operator does not change any of the statistics. But it renames + * the column statistics from its parent based on the output key and value + * column names to make it easy for the downstream operators. This is different + * from the default stats which just aggregates and passes along the statistics + * without actually renaming based on output schema of the operator. + */ + public static class ReduceSinkStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator rop = (ReduceSinkOperator) nd; + Operator parent = rop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + if (parentStats != null) { + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + + List outKeyColNames = rop.getConf().getOutputKeyColumnNames(); + List outValueColNames = rop.getConf().getOutputValueColumnNames(); + Map colExprMap = rop.getColumnExprMap(); + try { + Statistics outStats = parentStats.clone(); + List colStats = Lists.newArrayList(); + for (String key : outKeyColNames) { + String prefixedKey = "KEY." + key; + ExprNodeDesc end = colExprMap.get(prefixedKey); + ColStatistics cs = StatsUtils.getColStatisticsFromExpression(conf, parentStats, end); + if (cs != null) { + cs.setColumnName(key); + colStats.add(cs); + } + } + + for (String val : outValueColNames) { + String prefixedVal = "VALUE." + val; + ExprNodeDesc end = colExprMap.get(prefixedVal); + ColStatistics cs = StatsUtils.getColStatisticsFromExpression(conf, parentStats, end); + if (cs != null) { + cs.setColumnName(val); + colStats.add(cs); + } + } + + outStats.setColumnStats(colStats); + rop.setStatistics(outStats); + if (LOG.isDebugEnabled()) { + LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + } + return null; + } + + } + + /** * Default rule is to aggregate the statistics from all its parent operators. */ public static class DefaultStatsRule implements NodeProcessor { @@ -1105,6 +1260,10 @@ public static NodeProcessor getLimitRule() { return new LimitStatsRule(); } + public static NodeProcessor getReduceSinkRule() { + return new ReduceSinkStatsRule(); + } + public static NodeProcessor getDefaultRule() { return new DefaultStatsRule(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 818590a..31e921c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -884,12 +884,9 @@ public static long getWritableSize(ObjectInspector oi, Object value) { if (colExprMap != null) { for (ColumnInfo ci : rowSchema.getSignature()) { String outColName = ci.getInternalName(); + outColName = StatsUtils.stripPrefixFromColumnName(outColName); String outTabAlias = ci.getTabAlias(); ExprNodeDesc end = colExprMap.get(outColName); - if (end == null) { - outColName = StatsUtils.stripPrefixFromColumnName(outColName); - end = colExprMap.get(outColName); - } ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end); if (colStat != null) { outColName = StatsUtils.stripPrefixFromColumnName(outColName); @@ -1150,7 +1147,7 @@ public static long getDataSizeFromColumnStats(long numRows, List */ public static String stripPrefixFromColumnName(String colName) { String stripedName = colName; - if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) { + if (colName.startsWith("KEY") || colName.startsWith("VALUE")) { // strip off KEY./VALUE. from column name stripedName = colName.split("\\.")[1]; } @@ -1218,15 +1215,16 @@ private static String getFullyQualifiedName(String... names) { for (Map.Entry entry : map.entrySet()) { if (entry.getValue().isSame(end)) { outColName = entry.getKey(); + outColName = stripPrefixFromColumnName(outColName); } } if (end instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; if (outColName == null) { outColName = encd.getColumn(); + outColName = stripPrefixFromColumnName(outColName); } String tabAlias = encd.getTabAlias(); - outColName = stripPrefixFromColumnName(outColName); result.add(getFullyQualifiedColumnName(tabAlias, outColName)); } else if (end instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end; diff --git ql/src/test/queries/clientpositive/annotate_stats_filter.q ql/src/test/queries/clientpositive/annotate_stats_filter.q index d74e760..204ed87 100644 --- ql/src/test/queries/clientpositive/annotate_stats_filter.q +++ ql/src/test/queries/clientpositive/annotate_stats_filter.q @@ -15,76 +15,76 @@ load data local inpath '../../data/files/loc.txt' overwrite into table loc_stagi insert overwrite table loc_orc select * from loc_staging; -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc; +explain select * from loc_orc; -- column stats are not COMPLETE, so stats are not updated -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc where state='OH'; +explain select * from loc_orc where state='OH'; analyze table loc_orc compute statistics for columns state,locid,zip,year; -- state column has 5 distincts. numRows/countDistincts -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where state='OH'; +explain select * from loc_orc where state='OH'; -- not equals comparison shouldn't affect number of rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where state!='OH'; -explain extended select * from loc_orc where state<>'OH'; +explain select * from loc_orc where state!='OH'; +explain select * from loc_orc where state<>'OH'; -- nulls are treated as constant equality comparison -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where zip is null; +explain select * from loc_orc where zip is null; -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where !(zip is not null); +explain select * from loc_orc where !(zip is not null); -- not nulls are treated as inverse of nulls -- numRows: 7 rawDataSize: 702 -explain extended select * from loc_orc where zip is not null; +explain select * from loc_orc where zip is not null; -- numRows: 7 rawDataSize: 702 -explain extended select * from loc_orc where !(zip is null); +explain select * from loc_orc where !(zip is null); -- NOT evaluation. true will pass all rows, false will not pass any rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where !false; +explain select * from loc_orc where !false; -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where !true; +explain select * from loc_orc where !true; -- Constant evaluation. true will pass all rows, false will not pass any rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where true; +explain select * from loc_orc where true; -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where 'foo'; +explain select * from loc_orc where 'foo'; -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where true = true; +explain select * from loc_orc where true = true; -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where false = true; +explain select * from loc_orc where false = true; -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where 'foo' = 'bar'; +explain select * from loc_orc where 'foo' = 'bar'; -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where false; +explain select * from loc_orc where false; -- OR evaluation. 1 row for OH and 1 row for CA -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where state='OH' or state='CA'; +explain select * from loc_orc where state='OH' or state='CA'; -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where year=2001 and year is null; +explain select * from loc_orc where year=2001 and year is null; -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where year=2001 and state='OH' and state='FL'; +explain select * from loc_orc where year=2001 and state='OH' and state='FL'; -- AND and OR together. left expr will yield 1 row and right will yield 1 row -- numRows: 3 rawDataSize: 306 -explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA'); +explain select * from loc_orc where (year=2001 and year is null) or (state='CA'); -- AND and OR together. left expr will yield 8 rows and right will yield 1 row -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA'); +explain select * from loc_orc where (year=2001 or year is null) and (state='CA'); -- all inequality conditions rows/3 is the rules -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where locid < 30; -explain extended select * from loc_orc where locid > 30; -explain extended select * from loc_orc where locid <= 30; -explain extended select * from loc_orc where locid >= 30; +explain select * from loc_orc where locid < 30; +explain select * from loc_orc where locid > 30; +explain select * from loc_orc where locid <= 30; +explain select * from loc_orc where locid >= 30; diff --git ql/src/test/queries/clientpositive/annotate_stats_groupby.q ql/src/test/queries/clientpositive/annotate_stats_groupby.q index 05cb036..e8e84c6 100644 --- ql/src/test/queries/clientpositive/annotate_stats_groupby.q +++ ql/src/test/queries/clientpositive/annotate_stats_groupby.q @@ -15,14 +15,14 @@ load data local inpath '../../data/files/loc.txt' overwrite into table loc_stagi insert overwrite table loc_orc select * from loc_staging; -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc; +explain select * from loc_orc; -- partial column stats analyze table loc_orc compute statistics for columns state; -- inner group by: map - numRows: 8 reduce - numRows: 4 -- outer group by: map - numRows: 4 reduce numRows: 2 -explain extended select a, c, min(b) +explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c from loc_orc group by state,locid @@ -34,36 +34,36 @@ analyze table loc_orc compute statistics for columns state,locid,zip,year; -- only one distinct value in year column + 1 NULL value -- map-side GBY: numRows: 8 (map-side will not do any reduction) -- reduce-side GBY: numRows: 2 -explain extended select year from loc_orc group by year; +explain select year from loc_orc group by year; -- map-side GBY: numRows: 8 -- reduce-side GBY: numRows: 4 -explain extended select state,locid from loc_orc group by state,locid; +explain select state,locid from loc_orc group by state,locid; -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 -explain extended select state,locid from loc_orc group by state,locid with cube; +explain select state,locid from loc_orc group by state,locid with cube; -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 -explain extended select state,locid from loc_orc group by state,locid with rollup; +explain select state,locid from loc_orc group by state,locid with rollup; -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state)); +explain select state,locid from loc_orc group by state,locid grouping sets((state)); -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); +explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); set hive.stats.map.parallelism=10; -- map-side GBY: numRows: 80 (map-side will not do any reduction) -- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) -explain extended select year from loc_orc group by year; +explain select year from loc_orc group by year; -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) -explain extended select state,locid from loc_orc group by state,locid with cube; +explain select state,locid from loc_orc group by state,locid with cube; diff --git ql/src/test/queries/clientpositive/annotate_stats_join.q ql/src/test/queries/clientpositive/annotate_stats_join.q index 965b0b7..bd5f642 100644 --- ql/src/test/queries/clientpositive/annotate_stats_join.q +++ ql/src/test/queries/clientpositive/annotate_stats_join.q @@ -1,81 +1,70 @@ set hive.stats.fetch.column.stats=true; +set hive.stats.ndv.error=0.0; -create table if not exists emp_staging ( +create table if not exists emp ( lastname string, - deptid int + deptid int, + locid int ) row format delimited fields terminated by '|' stored as textfile; -create table if not exists dept_staging ( +create table if not exists dept ( deptid int, deptname string ) row format delimited fields terminated by '|' stored as textfile; -create table if not exists loc_staging ( +create table if not exists loc ( state string, locid int, zip bigint, year int ) row format delimited fields terminated by '|' stored as textfile; -create table if not exists emp_orc like emp_staging; -alter table emp_orc set fileformat orc; - -create table if not exists dept_orc like dept_staging; -alter table dept_orc set fileformat orc; - -create table loc_orc like loc_staging; -alter table loc_orc set fileformat orc; +LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp; +LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept; +LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc; -LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; -LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging; -LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; - -insert overwrite table emp_orc select * from emp_staging; -insert overwrite table dept_orc select * from dept_staging; -insert overwrite table loc_orc select * from loc_staging; - -analyze table emp_orc compute statistics for columns lastname,deptid; -analyze table dept_orc compute statistics for columns deptname,deptid; -analyze table loc_orc compute statistics for columns state,locid,zip,year; +analyze table emp compute statistics; +analyze table dept compute statistics; +analyze table loc compute statistics; +analyze table emp compute statistics for columns lastname,deptid,locid; +analyze table dept compute statistics for columns deptname,deptid; +analyze table loc compute statistics for columns state,locid,zip,year; -- number of rows --- emp_orc - 6 --- dept_orc - 4 --- loc_orc - 8 +-- emp - 48 +-- dept - 6 +-- loc - 8 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) --- emp_orc.deptid - 3 --- emp_orc.lastname - 7 --- dept_orc.deptid - 6 --- dept_orc.deptname - 5 --- loc_orc.locid - 6 --- loc_orc.state - 7 - --- Expected output rows: 4 --- Reason: #rows = (6*4)/max(3,6) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); - --- 3 way join --- Expected output rows: 4 --- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid); - --- Expected output rows: 5 --- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid); - --- join keys of different types --- Expected output rows: 4 --- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state); - --- multi-attribute join --- Expected output rows: 0 --- Reason: #rows = (6*4)/max(3,6)*max(7,5) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname); - --- 3 way and multi-attribute join --- Expected output rows: 0 --- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state); +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid); + +-- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname; +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname); + +-- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname; + +-- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid); + +-- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid); + +-- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state); diff --git ql/src/test/queries/clientpositive/annotate_stats_limit.q ql/src/test/queries/clientpositive/annotate_stats_limit.q index 0a9f880..b82fe30 100644 --- ql/src/test/queries/clientpositive/annotate_stats_limit.q +++ ql/src/test/queries/clientpositive/annotate_stats_limit.q @@ -17,14 +17,14 @@ insert overwrite table loc_orc select * from loc_staging; analyze table loc_orc compute statistics for columns state, locid, zip, year; -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc; +explain select * from loc_orc; -- numRows: 4 rawDataSize: 396 -explain extended select * from loc_orc limit 4; +explain select * from loc_orc limit 4; -- greater than the available number of rows -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc limit 16; +explain select * from loc_orc limit 16; -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc limit 0; +explain select * from loc_orc limit 0; diff --git ql/src/test/queries/clientpositive/annotate_stats_part.q ql/src/test/queries/clientpositive/annotate_stats_part.q index 839c7d8..f25776a 100644 --- ql/src/test/queries/clientpositive/annotate_stats_part.q +++ ql/src/test/queries/clientpositive/annotate_stats_part.q @@ -19,67 +19,67 @@ create table if not exists loc_orc ( ) partitioned by(year string) stored as orc; -- basicStatState: NONE colStatState: NONE -explain extended select * from loc_orc; +explain select * from loc_orc; insert overwrite table loc_orc partition(year) select * from loc_staging; -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc; +explain select * from loc_orc; -- partition level analyze statistics for specific parition analyze table loc_orc partition(year='2001') compute statistics; -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; +explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc; +explain select * from loc_orc; -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='2001'; +explain select * from loc_orc where year='2001'; -- partition level analyze statistics for all partitions analyze table loc_orc partition(year) compute statistics; -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; +explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc; +explain select * from loc_orc; -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__'; +explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__'; -- both partitions will be pruned -- basicStatState: NONE colStatState: NONE -explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__'; +explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__'; -- partition level partial column statistics analyze table loc_orc partition(year='2001') compute statistics for columns state,locid; -- basicStatState: COMPLETE colStatState: NONE -explain extended select zip from loc_orc; +explain select zip from loc_orc; -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select state from loc_orc; +explain select state from loc_orc; -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select state,locid from loc_orc; +explain select state,locid from loc_orc; -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select state,locid from loc_orc where year='2001'; +explain select state,locid from loc_orc where year='2001'; -- basicStatState: COMPLETE colStatState: NONE -explain extended select state,locid from loc_orc where year!='2001'; +explain select state,locid from loc_orc where year!='2001'; -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select * from loc_orc; +explain select * from loc_orc; -- This is to test filter expression evaluation on partition column -- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE -explain extended select locid from loc_orc where locid>0 and year='2001'; -explain extended select locid,year from loc_orc where locid>0 and year='2001'; -explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001'; +explain select locid from loc_orc where locid>0 and year='2001'; +explain select locid,year from loc_orc where locid>0 and year='2001'; +explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001'; diff --git ql/src/test/queries/clientpositive/annotate_stats_select.q ql/src/test/queries/clientpositive/annotate_stats_select.q index 5fc3f64..e958a00 100644 --- ql/src/test/queries/clientpositive/annotate_stats_select.q +++ ql/src/test/queries/clientpositive/annotate_stats_select.q @@ -28,116 +28,116 @@ load data local inpath '../../data/files/alltypes.txt' overwrite into table allt insert overwrite table alltypes_orc select * from alltypes; -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 -explain extended select * from alltypes_orc; +explain select * from alltypes_orc; -- statistics for complex types are not supported yet analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1; -- numRows: 2 rawDataSize: 1514 -explain extended select * from alltypes_orc; +explain select * from alltypes_orc; -- numRows: 2 rawDataSize: 8 -explain extended select bo1 from alltypes_orc; +explain select bo1 from alltypes_orc; -- col alias renaming -- numRows: 2 rawDataSize: 8 -explain extended select i1 as int1 from alltypes_orc; +explain select i1 as int1 from alltypes_orc; -- numRows: 2 rawDataSize: 174 -explain extended select s1 from alltypes_orc; +explain select s1 from alltypes_orc; -- column statistics for complex types unsupported and so statistics will not be updated -- numRows: 2 rawDataSize: 1514 -explain extended select m1 from alltypes_orc; +explain select m1 from alltypes_orc; -- numRows: 2 rawDataSize: 246 -explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; +explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; -- numRows: 2 rawDataSize: 0 -explain extended select null from alltypes_orc; +explain select null from alltypes_orc; -- numRows: 2 rawDataSize: 8 -explain extended select 11 from alltypes_orc; +explain select 11 from alltypes_orc; -- numRows: 2 rawDataSize: 16 -explain extended select 11L from alltypes_orc; +explain select 11L from alltypes_orc; -- numRows: 2 rawDataSize: 16 -explain extended select 11.0 from alltypes_orc; +explain select 11.0 from alltypes_orc; -- numRows: 2 rawDataSize: 178 -explain extended select "hello" from alltypes_orc; -explain extended select cast("hello" as char(5)) from alltypes_orc; -explain extended select cast("hello" as varchar(5)) from alltypes_orc; +explain select "hello" from alltypes_orc; +explain select cast("hello" as char(5)) from alltypes_orc; +explain select cast("hello" as varchar(5)) from alltypes_orc; -- numRows: 2 rawDataSize: 96 -explain extended select unbase64("0xe23") from alltypes_orc; +explain select unbase64("0xe23") from alltypes_orc; -- numRows: 2 rawDataSize: 16 -explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; +explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; -- numRows: 2 rawDataSize: 80 -explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; +explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; -- numRows: 2 rawDataSize: 112 -explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; +explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; -- numRows: 2 rawDataSize: 224 -explain extended select cast("58.174" as DECIMAL) from alltypes_orc; +explain select cast("58.174" as DECIMAL) from alltypes_orc; -- numRows: 2 rawDataSize: 112 -explain extended select array(1,2,3) from alltypes_orc; +explain select array(1,2,3) from alltypes_orc; -- numRows: 2 rawDataSize: 1508 -explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; +explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; -- numRows: 2 rawDataSize: 112 -explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; +explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; -- numRows: 2 rawDataSize: 250 -explain extended select CREATE_UNION(0, "hello") from alltypes_orc; +explain select CREATE_UNION(0, "hello") from alltypes_orc; -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows -- numRows: 1 rawDataSize: 8 -explain extended select count(*) from alltypes_orc; +explain select count(*) from alltypes_orc; -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows -- numRows: 1 rawDataSize: 8 -explain extended select count(1) from alltypes_orc; +explain select count(1) from alltypes_orc; -- column statistics for complex column types will be missing. data size will be calculated from available column statistics -- numRows: 2 rawDataSize: 254 -explain extended select *,11 from alltypes_orc; +explain select *,11 from alltypes_orc; -- subquery selects -- inner select - numRows: 2 rawDataSize: 8 -- outer select - numRows: 2 rawDataSize: 8 -explain extended select i1 from (select i1 from alltypes_orc limit 10) temp; +explain select i1 from (select i1 from alltypes_orc limit 10) temp; -- inner select - numRows: 2 rawDataSize: 16 -- outer select - numRows: 2 rawDataSize: 8 -explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp; +explain select i1 from (select i1,11 from alltypes_orc limit 10) temp; -- inner select - numRows: 2 rawDataSize: 16 -- outer select - numRows: 2 rawDataSize: 186 -explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp; +explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp; -- inner select - numRows: 2 rawDataSize: 24 -- outer select - numRows: 2 rawDataSize: 16 -explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp; +explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp; -- inner select - numRows: 2 rawDataSize: 104 -- outer select - numRows: 2 rawDataSize: 186 -explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp; +explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp; -- inner select - numRows: 2 rawDataSize: 186 -- middle select - numRows: 2 rawDataSize: 178 -- outer select - numRows: 2 rawDataSize: 194 -explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2; +explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2; -- This test is for FILTER operator where filter expression is a boolean column -- numRows: 2 rawDataSize: 8 -explain extended select bo1 from alltypes_orc where bo1; +explain select bo1 from alltypes_orc where bo1; -- numRows: 0 rawDataSize: 0 -explain extended select bo1 from alltypes_orc where !bo1; +explain select bo1 from alltypes_orc where !bo1; diff --git ql/src/test/queries/clientpositive/annotate_stats_table.q ql/src/test/queries/clientpositive/annotate_stats_table.q index 4140fe6..ea1999f 100644 --- ql/src/test/queries/clientpositive/annotate_stats_table.q +++ ql/src/test/queries/clientpositive/annotate_stats_table.q @@ -10,7 +10,7 @@ create table if not exists emp_orc like emp_staging; alter table emp_orc set fileformat orc; -- basicStatState: NONE colStatState: NONE -explain extended select * from emp_orc; +explain select * from emp_orc; LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; @@ -19,35 +19,35 @@ insert overwrite table emp_orc select * from emp_staging; -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from emp_orc; +explain select * from emp_orc; -- table level analyze statistics analyze table emp_orc compute statistics; -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from emp_orc; +explain select * from emp_orc; -- column level partial statistics analyze table emp_orc compute statistics for columns deptid; -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select * from emp_orc; +explain select * from emp_orc; -- all selected columns have statistics -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select deptid from emp_orc; +explain select deptid from emp_orc; -- column level complete statistics analyze table emp_orc compute statistics for columns lastname,deptid; -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select * from emp_orc; +explain select * from emp_orc; -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select lastname from emp_orc; +explain select lastname from emp_orc; -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select deptid from emp_orc; +explain select deptid from emp_orc; -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select lastname,deptid from emp_orc; +explain select lastname,deptid from emp_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_union.q ql/src/test/queries/clientpositive/annotate_stats_union.q index 586d9e1..b0017f5 100644 --- ql/src/test/queries/clientpositive/annotate_stats_union.q +++ ql/src/test/queries/clientpositive/annotate_stats_union.q @@ -17,16 +17,16 @@ insert overwrite table loc_orc select * from loc_staging; analyze table loc_orc compute statistics for columns state,locid,zip,year; -- numRows: 8 rawDataSize: 688 -explain extended select state from loc_orc; +explain select state from loc_orc; -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp; +explain select * from (select state from loc_orc union all select state from loc_orc) tmp; -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc; +explain select * from loc_orc; -- numRows: 16 rawDataSize: 1592 -explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp; +explain select * from (select * from loc_orc union all select * from loc_orc) tmp; create database test; use test; @@ -49,7 +49,7 @@ analyze table loc_staging compute statistics for columns state,locid,zip,year; analyze table loc_orc compute statistics for columns state,locid,zip,year; -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp; +explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp; -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp; +explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp; diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 7afed30..ee93ef0 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -51,27 +51,11 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] PREHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -83,7 +67,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -92,33 +75,12 @@ STAGE PLANS: PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc where state='OH' +explain select * from loc_orc where state='OH' PREHOOK: type: QUERY POSTHOOK: query: -- column stats are not COMPLETE, so stats are not updated -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc where state='OH' +explain select * from loc_orc where state='OH' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - TOK_TABLE_OR_COL - state - 'OH' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -130,9 +92,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (state = 'OH') (type: boolean) Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -141,76 +101,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -228,33 +123,12 @@ POSTHOOK: Input: default@loc_orc #### A masked pattern was here #### PREHOOK: query: -- state column has 5 distincts. numRows/countDistincts -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where state='OH' +explain select * from loc_orc where state='OH' PREHOOK: type: QUERY POSTHOOK: query: -- state column has 5 distincts. numRows/countDistincts -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where state='OH' +explain select * from loc_orc where state='OH' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - TOK_TABLE_OR_COL - state - 'OH' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -266,9 +140,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (state = 'OH') (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -277,76 +149,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -356,33 +163,12 @@ STAGE PLANS: PREHOOK: query: -- not equals comparison shouldn't affect number of rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where state!='OH' +explain select * from loc_orc where state!='OH' PREHOOK: type: QUERY POSTHOOK: query: -- not equals comparison shouldn't affect number of rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where state!='OH' +explain select * from loc_orc where state!='OH' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - != - TOK_TABLE_OR_COL - state - 'OH' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -394,9 +180,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (state <> 'OH') (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -405,76 +189,11 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -482,31 +201,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select * from loc_orc where state<>'OH' +PREHOOK: query: explain select * from loc_orc where state<>'OH' PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from loc_orc where state<>'OH' +POSTHOOK: query: explain select * from loc_orc where state<>'OH' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - <> - TOK_TABLE_OR_COL - state - 'OH' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -518,9 +216,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (state <> 'OH') (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -529,76 +225,11 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -608,33 +239,12 @@ STAGE PLANS: PREHOOK: query: -- nulls are treated as constant equality comparison -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where zip is null +explain select * from loc_orc where zip is null PREHOOK: type: QUERY POSTHOOK: query: -- nulls are treated as constant equality comparison -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where zip is null +explain select * from loc_orc where zip is null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - TOK_FUNCTION - TOK_ISNULL - TOK_TABLE_OR_COL - zip - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -646,9 +256,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: zip is null (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -657,76 +265,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -735,33 +278,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where !(zip is not null) +explain select * from loc_orc where !(zip is not null) PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where !(zip is not null) +explain select * from loc_orc where !(zip is not null) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - ! - TOK_FUNCTION - TOK_ISNOTNULL - TOK_TABLE_OR_COL - zip - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -773,9 +294,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (not zip is not null) (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -784,76 +303,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -863,33 +317,12 @@ STAGE PLANS: PREHOOK: query: -- not nulls are treated as inverse of nulls -- numRows: 7 rawDataSize: 702 -explain extended select * from loc_orc where zip is not null +explain select * from loc_orc where zip is not null PREHOOK: type: QUERY POSTHOOK: query: -- not nulls are treated as inverse of nulls -- numRows: 7 rawDataSize: 702 -explain extended select * from loc_orc where zip is not null +explain select * from loc_orc where zip is not null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - TOK_FUNCTION - TOK_ISNOTNULL - TOK_TABLE_OR_COL - zip - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -901,9 +334,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: zip is not null (type: boolean) Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -912,76 +343,11 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -990,33 +356,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 7 rawDataSize: 702 -explain extended select * from loc_orc where !(zip is null) +explain select * from loc_orc where !(zip is null) PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 7 rawDataSize: 702 -explain extended select * from loc_orc where !(zip is null) +explain select * from loc_orc where !(zip is null) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - ! - TOK_FUNCTION - TOK_ISNULL - TOK_TABLE_OR_COL - zip - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1028,9 +372,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (not zip is null) (type: boolean) Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1039,76 +381,11 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -1118,31 +395,12 @@ STAGE PLANS: PREHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where !false +explain select * from loc_orc where !false PREHOOK: type: QUERY POSTHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where !false +explain select * from loc_orc where !false POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - ! - false - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1154,7 +412,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1162,30 +419,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where !true +explain select * from loc_orc where !true PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where !true +explain select * from loc_orc where !true POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - ! - true - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1197,9 +435,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator @@ -1208,76 +444,11 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] - Path -> Partition: - -mr-10002default.loc_orc{} - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] Stage: Stage-0 Fetch Operator @@ -1287,30 +458,12 @@ STAGE PLANS: PREHOOK: query: -- Constant evaluation. true will pass all rows, false will not pass any rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where true +explain select * from loc_orc where true PREHOOK: type: QUERY POSTHOOK: query: -- Constant evaluation. true will pass all rows, false will not pass any rows -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where true +explain select * from loc_orc where true POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - true - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1322,7 +475,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1330,29 +482,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where 'foo' +explain select * from loc_orc where 'foo' PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where 'foo' +explain select * from loc_orc where 'foo' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - 'foo' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1364,9 +498,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: 'foo' (type: string) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1375,76 +507,11 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -1453,31 +520,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where true = true +explain select * from loc_orc where true = true PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 804 -explain extended select * from loc_orc where true = true +explain select * from loc_orc where true = true POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - true - true - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1489,7 +536,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1497,31 +543,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where false = true +explain select * from loc_orc where false = true PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where false = true +explain select * from loc_orc where false = true POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - false - true - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1533,9 +559,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator @@ -1544,76 +568,11 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] - Path -> Partition: - -mr-10002default.loc_orc{} - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] Stage: Stage-0 Fetch Operator @@ -1622,31 +581,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where 'foo' = 'bar' +explain select * from loc_orc where 'foo' = 'bar' PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where 'foo' = 'bar' +explain select * from loc_orc where 'foo' = 'bar' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - 'foo' - 'bar' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1658,9 +597,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator @@ -1669,76 +606,11 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] - Path -> Partition: - -mr-10002default.loc_orc{} - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] Stage: Stage-0 Fetch Operator @@ -1747,29 +619,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where false +explain select * from loc_orc where false PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc where false +explain select * from loc_orc where false POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - false - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1781,9 +635,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator @@ -1792,76 +644,11 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] - Path -> Partition: - -mr-10002default.loc_orc{} - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - -mr-10002default.loc_orc{} [loc_orc] Stage: Stage-0 Fetch Operator @@ -1871,38 +658,12 @@ STAGE PLANS: PREHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where state='OH' or state='CA' +explain select * from loc_orc where state='OH' or state='CA' PREHOOK: type: QUERY POSTHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where state='OH' or state='CA' +explain select * from loc_orc where state='OH' or state='CA' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - or - = - TOK_TABLE_OR_COL - state - 'OH' - = - TOK_TABLE_OR_COL - state - 'CA' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1914,9 +675,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1925,76 +684,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2004,38 +698,12 @@ STAGE PLANS: PREHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where year=2001 and year is null +explain select * from loc_orc where year=2001 and year is null PREHOOK: type: QUERY POSTHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where year=2001 and year is null +explain select * from loc_orc where year=2001 and year is null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - = - TOK_TABLE_OR_COL - year - 2001 - TOK_FUNCTION - TOK_ISNULL - TOK_TABLE_OR_COL - year - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2047,9 +715,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: ((year = 2001) and year is null) (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2058,76 +724,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2136,42 +737,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +explain select * from loc_orc where year=2001 and state='OH' and state='FL' PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +explain select * from loc_orc where year=2001 and state='OH' and state='FL' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - = - TOK_TABLE_OR_COL - year - 2001 - = - TOK_TABLE_OR_COL - state - 'OH' - = - TOK_TABLE_OR_COL - state - 'FL' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2183,9 +753,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (((year = 2001) and (state = 'OH')) and (state = 'FL')) (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2194,76 +762,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2273,43 +776,12 @@ STAGE PLANS: PREHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row -- numRows: 3 rawDataSize: 306 -explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +explain select * from loc_orc where (year=2001 and year is null) or (state='CA') PREHOOK: type: QUERY POSTHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row -- numRows: 3 rawDataSize: 306 -explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +explain select * from loc_orc where (year=2001 and year is null) or (state='CA') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - or - and - = - TOK_TABLE_OR_COL - year - 2001 - TOK_FUNCTION - TOK_ISNULL - TOK_TABLE_OR_COL - year - = - TOK_TABLE_OR_COL - state - 'CA' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2321,9 +793,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (((year = 2001) and year is null) or (state = 'CA')) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2332,76 +802,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2411,43 +816,12 @@ STAGE PLANS: PREHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +explain select * from loc_orc where (year=2001 or year is null) and (state='CA') PREHOOK: type: QUERY POSTHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row -- numRows: 1 rawDataSize: 102 -explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +explain select * from loc_orc where (year=2001 or year is null) and (state='CA') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - or - = - TOK_TABLE_OR_COL - year - 2001 - TOK_FUNCTION - TOK_ISNULL - TOK_TABLE_OR_COL - year - = - TOK_TABLE_OR_COL - state - 'CA' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2459,9 +833,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2470,76 +842,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2549,33 +856,12 @@ STAGE PLANS: PREHOOK: query: -- all inequality conditions rows/3 is the rules -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where locid < 30 +explain select * from loc_orc where locid < 30 PREHOOK: type: QUERY POSTHOOK: query: -- all inequality conditions rows/3 is the rules -- numRows: 2 rawDataSize: 204 -explain extended select * from loc_orc where locid < 30 +explain select * from loc_orc where locid < 30 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - < - TOK_TABLE_OR_COL - locid - 30 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2587,9 +873,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (locid < 30) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2598,76 +882,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2675,31 +894,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select * from loc_orc where locid > 30 +PREHOOK: query: explain select * from loc_orc where locid > 30 PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from loc_orc where locid > 30 +POSTHOOK: query: explain select * from loc_orc where locid > 30 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - > - TOK_TABLE_OR_COL - locid - 30 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2711,9 +909,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (locid > 30) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2722,76 +918,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2799,31 +930,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select * from loc_orc where locid <= 30 +PREHOOK: query: explain select * from loc_orc where locid <= 30 PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from loc_orc where locid <= 30 +POSTHOOK: query: explain select * from loc_orc where locid <= 30 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - <= - TOK_TABLE_OR_COL - locid - 30 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2835,9 +945,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (locid <= 30) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2846,76 +954,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -2923,31 +966,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select * from loc_orc where locid >= 30 +PREHOOK: query: explain select * from loc_orc where locid >= 30 PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from loc_orc where locid >= 30 +POSTHOOK: query: explain select * from loc_orc where locid >= 30 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - >= - TOK_TABLE_OR_COL - locid - 30 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2959,9 +981,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (locid >= 30) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2970,76 +990,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index ad1d2f8..224db28 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -51,27 +51,11 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] PREHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -83,7 +67,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -102,7 +85,7 @@ POSTHOOK: Input: default@loc_orc #### A masked pattern was here #### PREHOOK: query: -- inner group by: map - numRows: 8 reduce - numRows: 4 -- outer group by: map - numRows: 4 reduce numRows: 2 -explain extended select a, c, min(b) +explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c from loc_orc group by state,locid @@ -111,69 +94,13 @@ group by a,c PREHOOK: type: QUERY POSTHOOK: query: -- inner group by: map - numRows: 8 reduce - numRows: 4 -- outer group by: map - numRows: 4 reduce numRows: 2 -explain extended select a, c, min(b) +explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c from loc_orc group by state,locid ) sq1 group by a,c POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - a - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - b - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - c - TOK_GROUPBY - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - sq1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - a - TOK_SELEXPR - TOK_TABLE_OR_COL - c - TOK_SELEXPR - TOK_FUNCTION - min - TOK_TABLE_OR_COL - b - TOK_GROUPBY - TOK_TABLE_OR_COL - a - TOK_TABLE_OR_COL - c - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -186,7 +113,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -196,173 +122,65 @@ STAGE PLANS: keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL - tag: -1 + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [sq1:loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 416 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,bigint,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL - tag: -1 + Statistics: Num rows: 4 Data size: 416 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,bigint,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,bigint,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL -#### A masked pattern was here #### + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -381,33 +199,13 @@ POSTHOOK: Input: default@loc_orc PREHOOK: query: -- only one distinct value in year column + 1 NULL value -- map-side GBY: numRows: 8 (map-side will not do any reduction) -- reduce-side GBY: numRows: 2 -explain extended select year from loc_orc group by year +explain select year from loc_orc group by year PREHOOK: type: QUERY POSTHOOK: query: -- only one distinct value in year column + 1 NULL value -- map-side GBY: numRows: 8 (map-side will not do any reduction) -- reduce-side GBY: numRows: 2 -explain extended select year from loc_orc group by year +explain select year from loc_orc group by year POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - year - TOK_GROUPBY - TOK_TABLE_OR_COL - year - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -419,7 +217,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: year (type: int) outputColumnNames: year @@ -434,60 +231,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -500,25 +243,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -528,37 +257,12 @@ STAGE PLANS: PREHOOK: query: -- map-side GBY: numRows: 8 -- reduce-side GBY: numRows: 4 -explain extended select state,locid from loc_orc group by state,locid +explain select state,locid from loc_orc group by state,locid PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY: numRows: 8 -- reduce-side GBY: numRows: 4 -explain extended select state,locid from loc_orc group by state,locid +explain select state,locid from loc_orc group by state,locid POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_GROUPBY - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -570,7 +274,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -585,60 +288,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -651,25 +300,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -678,36 +313,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 -explain extended select state,locid from loc_orc group by state,locid with cube +explain select state,locid from loc_orc group by state,locid with cube PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 -explain extended select state,locid from loc_orc group by state,locid with cube +explain select state,locid from loc_orc group by state,locid with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_CUBE_GROUPBY - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -719,7 +329,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -734,60 +343,6 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) @@ -800,25 +355,11 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -827,36 +368,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 -explain extended select state,locid from loc_orc group by state,locid with rollup +explain select state,locid from loc_orc group by state,locid with rollup PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 -explain extended select state,locid from loc_orc group by state,locid with rollup +explain select state,locid from loc_orc group by state,locid with rollup POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_ROLLUP_GROUPBY - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -868,7 +384,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -883,60 +398,6 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) @@ -949,25 +410,11 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -976,39 +423,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +explain select state,locid from loc_orc group by state,locid grouping sets((state)) PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +explain select state,locid from loc_orc group by state,locid grouping sets((state)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - state - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1020,7 +439,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -1035,60 +453,6 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) @@ -1101,25 +465,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1128,42 +478,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - state - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - locid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1175,7 +494,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -1190,60 +508,6 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) @@ -1256,25 +520,11 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1283,43 +533,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - state - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1331,7 +549,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -1346,60 +563,6 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) @@ -1412,25 +575,11 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1439,48 +588,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 -explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - state - TOK_GROUPING_SETS_EXPRESSION - TOK_TABLE_OR_COL - locid - TOK_GROUPING_SETS_EXPRESSION - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1492,7 +604,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -1507,60 +618,6 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) @@ -1573,25 +630,11 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1601,32 +644,12 @@ STAGE PLANS: PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) -- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) -explain extended select year from loc_orc group by year +explain select year from loc_orc group by year PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) -- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) -explain extended select year from loc_orc group by year +explain select year from loc_orc group by year POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - year - TOK_GROUPBY - TOK_TABLE_OR_COL - year - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1638,7 +661,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: year (type: int) outputColumnNames: year @@ -1653,60 +675,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -1719,25 +687,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1746,36 +700,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) -explain extended select state,locid from loc_orc group by state,locid with cube +explain select state,locid from loc_orc group by state,locid with cube PREHOOK: type: QUERY POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) -explain extended select state,locid from loc_orc group by state,locid with cube +explain select state,locid from loc_orc group by state,locid with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_CUBE_GROUPBY - TOK_TABLE_OR_COL - state - TOK_TABLE_OR_COL - locid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1787,7 +716,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid @@ -1802,91 +730,23 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 42 Data size: 7350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 35 Data size: 6125 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 42 Data size: 3780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 42 Data size: 3780 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index 2a6348c..fba3664 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -1,30 +1,32 @@ -PREHOOK: query: create table if not exists emp_staging ( +PREHOOK: query: create table if not exists emp ( lastname string, - deptid int + deptid int, + locid int ) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -POSTHOOK: query: create table if not exists emp_staging ( +POSTHOOK: query: create table if not exists emp ( lastname string, - deptid int + deptid int, + locid int ) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@emp_staging -PREHOOK: query: create table if not exists dept_staging ( +POSTHOOK: Output: default@emp +PREHOOK: query: create table if not exists dept ( deptid int, deptname string ) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -POSTHOOK: query: create table if not exists dept_staging ( +POSTHOOK: query: create table if not exists dept ( deptid int, deptname string ) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@dept_staging -PREHOOK: query: create table if not exists loc_staging ( +POSTHOOK: Output: default@dept +PREHOOK: query: create table if not exists loc ( state string, locid int, zip bigint, @@ -32,7 +34,7 @@ PREHOOK: query: create table if not exists loc_staging ( ) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -POSTHOOK: query: create table if not exists loc_staging ( +POSTHOOK: query: create table if not exists loc ( state string, locid int, zip bigint, @@ -40,197 +42,115 @@ POSTHOOK: query: create table if not exists loc_staging ( ) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_staging -PREHOOK: query: create table if not exists emp_orc like emp_staging -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table if not exists emp_orc like emp_staging -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@emp_orc -PREHOOK: query: alter table emp_orc set fileformat orc -PREHOOK: type: ALTERTABLE_FILEFORMAT -PREHOOK: Input: default@emp_orc -PREHOOK: Output: default@emp_orc -POSTHOOK: query: alter table emp_orc set fileformat orc -POSTHOOK: type: ALTERTABLE_FILEFORMAT -POSTHOOK: Input: default@emp_orc -POSTHOOK: Output: default@emp_orc -PREHOOK: query: create table if not exists dept_orc like dept_staging -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table if not exists dept_orc like dept_staging -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dept_orc -PREHOOK: query: alter table dept_orc set fileformat orc -PREHOOK: type: ALTERTABLE_FILEFORMAT -PREHOOK: Input: default@dept_orc -PREHOOK: Output: default@dept_orc -POSTHOOK: query: alter table dept_orc set fileformat orc -POSTHOOK: type: ALTERTABLE_FILEFORMAT -POSTHOOK: Input: default@dept_orc -POSTHOOK: Output: default@dept_orc -PREHOOK: query: create table loc_orc like loc_staging -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: create table loc_orc like loc_staging -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@loc_orc -PREHOOK: query: alter table loc_orc set fileformat orc -PREHOOK: type: ALTERTABLE_FILEFORMAT -PREHOOK: Input: default@loc_orc -PREHOOK: Output: default@loc_orc -POSTHOOK: query: alter table loc_orc set fileformat orc -POSTHOOK: type: ALTERTABLE_FILEFORMAT -POSTHOOK: Input: default@loc_orc -POSTHOOK: Output: default@loc_orc -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: Output: default@loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp PREHOOK: type: LOAD #### A masked pattern was here #### -PREHOOK: Output: default@emp_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: Output: default@emp +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@emp_staging -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +POSTHOOK: Output: default@emp +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept PREHOOK: type: LOAD #### A masked pattern was here #### -PREHOOK: Output: default@dept_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +PREHOOK: Output: default@dept +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@dept_staging -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: Output: default@dept +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc PREHOOK: type: LOAD #### A masked pattern was here #### -PREHOOK: Output: default@loc_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: Output: default@loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@loc_staging -PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp compute statistics PREHOOK: type: QUERY -PREHOOK: Input: default@emp_staging -PREHOOK: Output: default@emp_orc -POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: Input: default@emp +PREHOOK: Output: default@emp +POSTHOOK: query: analyze table emp compute statistics POSTHOOK: type: QUERY -POSTHOOK: Input: default@emp_staging -POSTHOOK: Output: default@emp_orc -POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] -POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] -PREHOOK: query: insert overwrite table dept_orc select * from dept_staging +POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp +PREHOOK: query: analyze table dept compute statistics PREHOOK: type: QUERY -PREHOOK: Input: default@dept_staging -PREHOOK: Output: default@dept_orc -POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging +PREHOOK: Input: default@dept +PREHOOK: Output: default@dept +POSTHOOK: query: analyze table dept compute statistics POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_staging -POSTHOOK: Output: default@dept_orc -POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] -POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] -PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept +PREHOOK: query: analyze table loc compute statistics PREHOOK: type: QUERY -PREHOOK: Input: default@loc_staging -PREHOOK: Output: default@loc_orc -POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: Input: default@loc +PREHOOK: Output: default@loc +POSTHOOK: query: analyze table loc compute statistics POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_staging -POSTHOOK: Output: default@loc_orc -POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] -POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] -POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] -PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY -PREHOOK: Input: default@emp_orc +PREHOOK: Input: default@emp #### A masked pattern was here #### -POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY -POSTHOOK: Input: default@emp_orc +POSTHOOK: Input: default@emp #### A masked pattern was here #### -PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY -PREHOOK: Input: default@dept_orc +PREHOOK: Input: default@dept #### A masked pattern was here #### -POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY -POSTHOOK: Input: default@dept_orc +POSTHOOK: Input: default@dept #### A masked pattern was here #### -PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY -PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc #### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY -POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc #### A masked pattern was here #### PREHOOK: query: -- number of rows --- emp_orc - 6 --- dept_orc - 4 --- loc_orc - 8 +-- emp - 48 +-- dept - 6 +-- loc - 8 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) --- emp_orc.deptid - 3 --- emp_orc.lastname - 7 --- dept_orc.deptid - 6 --- dept_orc.deptname - 5 --- loc_orc.locid - 6 --- loc_orc.state - 7 - --- Expected output rows: 4 --- Reason: #rows = (6*4)/max(3,6) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY POSTHOOK: query: -- number of rows --- emp_orc - 6 --- dept_orc - 4 --- loc_orc - 8 +-- emp - 48 +-- dept - 6 +-- loc - 8 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) --- emp_orc.deptid - 3 --- emp_orc.lastname - 7 --- dept_orc.deptid - 6 --- dept_orc.deptname - 5 --- loc_orc.locid - 6 --- loc_orc.state - 7 - --- Expected output rows: 4 --- Reason: #rows = (6*4)/max(3,6) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - emp_orc - e - TOK_TABREF - TOK_TABNAME - dept_orc - d - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - d - deptid - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -241,169 +161,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: deptid is not null (type: boolean) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE value expressions: deptname (type: string) - auto parallelism: false TableScan alias: e - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: deptid is not null (type: boolean) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: lastname (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dept_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dept_orc - name: default.dept_orc -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /dept_orc [d] - /emp_orc [e] - Needs Tagging: true + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: Num rows: 10 Data size: 1830 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col5, _col6 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 10 Data size: 1830 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 1830 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -411,61 +210,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- 3 way join --- Expected output rows: 4 --- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) +PREHOOK: query: -- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname PREHOOK: type: QUERY -POSTHOOK: query: -- 3 way join --- Expected output rows: 4 --- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) +POSTHOOK: query: -- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_JOIN - TOK_TABREF - TOK_TABNAME - emp_orc - e - TOK_TABREF - TOK_TABNAME - dept_orc - d - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - d - deptid - TOK_TABREF - TOK_TABNAME - emp_orc - e1 - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - e1 - deptid - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -475,188 +227,51 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: deptname (type: string) - auto parallelism: false + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: e1 - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + alias: emp + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 - value expressions: lastname (type: string) - auto parallelism: false - TableScan - alias: e - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: lastname (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dept_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dept_orc - name: default.dept_orc -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /dept_orc [d] - /emp_orc [e1, e] - Needs Tagging: true + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {VALUE._col0} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Statistics: Num rows: 50 Data size: 13700 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 50 Data size: 13700 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 50 Data size: 13700 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:int:int:string:string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col5, _col6 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col1 = _col5) and (_col0 = _col6)) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -664,59 +279,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- Expected output rows: 5 --- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) +PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) PREHOOK: type: QUERY -POSTHOOK: query: -- Expected output rows: 5 --- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) +POSTHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_JOIN - TOK_TABREF - TOK_TABNAME - emp_orc - e - TOK_TABREF - TOK_TABNAME - dept_orc - d - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - d - deptid - TOK_TABREF - TOK_TABNAME - loc_orc - l - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - l - locid - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -727,234 +293,47 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: deptname (type: string) - auto parallelism: false + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: e - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int) - sort order: + - Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: lastname (type: string) - auto parallelism: false - TableScan - alias: l - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: locid is not null (type: boolean) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: locid (type: int) - sort order: + - Map-reduce partition columns: locid (type: int) - Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 - value expressions: state (type: string), zip (type: bigint), year (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dept_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dept_orc - name: default.dept_orc -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /dept_orc [d] - /emp_orc [e] - /loc_orc [l] - Needs Tagging: true + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 - Statistics: Num rows: 80 Data size: 22468 Basic stats: COMPLETE Column stats: COMPLETE + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col5, _col6 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 80 Data size: 22468 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 80 Data size: 22468 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types string:int:int:string:string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -962,61 +341,83 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- join keys of different types --- Expected output rows: 4 --- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) +PREHOOK: query: -- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname PREHOOK: type: QUERY -POSTHOOK: query: -- join keys of different types --- Expected output rows: 4 --- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) +POSTHOOK: query: -- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_JOIN - TOK_TABREF - TOK_TABNAME - emp_orc - e - TOK_TABREF - TOK_TABNAME - dept_orc - d - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - d - deptid - TOK_TABREF - TOK_TABNAME - loc_orc - l - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - l - state - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string), deptname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string), lastname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col5, _col6 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col1 = _col5) and (_col0 = _col6)) and (_col6 = _col0)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +PREHOOK: query: -- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) +POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1027,234 +428,62 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: UDFToDouble(deptid) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: UDFToDouble(deptid) (type: double) + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: UDFToDouble(deptid) (type: double) - Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: deptid (type: int), deptname (type: string) - auto parallelism: false + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) TableScan - alias: e - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + alias: e1 + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: UDFToDouble(deptid) is not null (type: boolean) - Statistics: Num rows: 3 Data size: 281 Basic stats: COMPLETE Column stats: COMPLETE + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: UDFToDouble(deptid) (type: double) + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: UDFToDouble(deptid) (type: double) - Statistics: Num rows: 3 Data size: 281 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: lastname (type: string), deptid (type: int) - auto parallelism: false + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) TableScan - alias: l - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: UDFToDouble(state) is not null (type: boolean) - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: UDFToDouble(state) (type: double) + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: UDFToDouble(state) (type: double) - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 - value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dept_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dept_orc - name: default.dept_orc -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /dept_orc [d] - /emp_orc [e] - /loc_orc [l] - Needs Tagging: true + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10, _col11 + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string), _col9 (type: string), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 2 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types string:int:int:string:string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1262,57 +491,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- multi-attribute join --- Expected output rows: 0 --- Reason: #rows = (6*4)/max(3,6)*max(7,5) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) +PREHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) PREHOOK: type: QUERY -POSTHOOK: query: -- multi-attribute join --- Expected output rows: 0 --- Reason: #rows = (6*4)/max(3,6)*max(7,5) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) +POSTHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - emp_orc - e - TOK_TABREF - TOK_TABNAME - dept_orc - d - and - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - d - deptid - = - . - TOK_TABLE_OR_COL - e - lastname - . - TOK_TABLE_OR_COL - d - deptname - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1323,167 +507,62 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: false + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) TableScan alias: e - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: deptid (type: int), lastname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), lastname (type: string) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dept_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dept_orc - name: default.dept_orc -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /dept_orc [d] - /emp_orc [e] - Needs Tagging: true + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + TableScan + alias: l + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: locid is not null (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: locid (type: int) + sort order: + + Map-reduce partition columns: locid (type: int) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: state (type: string), zip (type: bigint), year (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10, _col11, _col12 + Statistics: Num rows: 47 Data size: 13900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string), _col9 (type: string), _col10 (type: int), _col11 (type: bigint), _col12 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 47 Data size: 13900 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 47 Data size: 13900 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1491,81 +570,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- 3 way and multi-attribute join --- Expected output rows: 0 --- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) +PREHOOK: query: -- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) PREHOOK: type: QUERY -POSTHOOK: query: -- 3 way and multi-attribute join --- Expected output rows: 0 --- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) -explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) +POSTHOOK: query: -- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_JOIN - TOK_TABREF - TOK_TABNAME - emp_orc - e - TOK_TABREF - TOK_TABNAME - dept_orc - d - and - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - d - deptid - = - . - TOK_TABLE_OR_COL - e - lastname - . - TOK_TABLE_OR_COL - d - deptname - TOK_TABREF - TOK_TABNAME - loc_orc - l - and - = - . - TOK_TABLE_OR_COL - e - deptid - . - TOK_TABLE_OR_COL - l - locid - = - . - TOK_TABLE_OR_COL - e - lastname - . - TOK_TABLE_OR_COL - l - state - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1576,40 +588,31 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int), deptname (type: string) sort order: ++ Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: false + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: e - Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int), lastname (type: string) sort order: ++ Map-reduce partition columns: deptid (type: int), lastname (type: string) - Statistics: Num rows: 5 Data size: 471 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - auto parallelism: false + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) TableScan alias: l - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: (locid is not null and state is not null) (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -1617,191 +620,29 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: locid (type: int), state (type: string) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 value expressions: zip (type: bigint), year (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: dept_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns deptid,deptname - columns.comments - columns.types int:string - field.delim | -#### A masked pattern was here #### - name default.dept_orc - numFiles 1 - numRows 4 - rawDataSize 384 - serialization.ddl struct dept_orc { i32 deptid, string deptname} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 329 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dept_orc - name: default.dept_orc -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 560 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /dept_orc [d] - /emp_orc [e] - /loc_orc [l] - Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} 2 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 - Statistics: Num rows: 40 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 40 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col5 (type: int), _col6 (type: string), _col9 (type: string), _col10 (type: int), _col11 (type: bigint), _col12 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 1 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types string:int:int:string:string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out index 5c150f4..24e6d18 100644 --- ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -59,27 +59,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc #### A masked pattern was here #### PREHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -91,7 +75,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -99,29 +82,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 4 rawDataSize: 396 -explain extended select * from loc_orc limit 4 +explain select * from loc_orc limit 4 PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 4 rawDataSize: 396 -explain extended select * from loc_orc limit 4 +explain select * from loc_orc limit 4 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_LIMIT - 4 - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -133,7 +98,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -145,30 +109,12 @@ STAGE PLANS: PREHOOK: query: -- greater than the available number of rows -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc limit 16 +explain select * from loc_orc limit 16 PREHOOK: type: QUERY POSTHOOK: query: -- greater than the available number of rows -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc limit 16 +explain select * from loc_orc limit 16 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_LIMIT - 16 - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -180,7 +126,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -191,29 +136,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc limit 0 +explain select * from loc_orc limit 0 PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select * from loc_orc limit 0 +explain select * from loc_orc limit 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_LIMIT - 0 - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -225,7 +152,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 043af14..e32afdd 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -39,27 +39,11 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@loc_orc PREHOOK: query: -- basicStatState: NONE colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: NONE colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -71,7 +55,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -96,29 +79,13 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -126,102 +93,14 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE false - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows -1 - partition_columns year - partition_columns.types string - rawDataSize -1 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE false - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows -1 - partition_columns year - partition_columns.types string - rawDataSize -1 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 5 Data size: 730 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for specific parition @@ -239,32 +118,11 @@ POSTHOOK: Input: default@loc_orc@year=2001 POSTHOOK: Output: default@loc_orc POSTHOOK: Output: default@loc_orc@year=2001 PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - TOK_TABLE_OR_COL - year - '__HIVE_DEFAULT_PARTITION__' - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -272,55 +130,10 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE false - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows -1 - partition_columns year - partition_columns.types string - rawDataSize -1 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -328,27 +141,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -356,131 +153,22 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE false - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows -1 - partition_columns year - partition_columns.types string - rawDataSize -1 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 9 Data size: 730 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='2001' +explain select * from loc_orc where year='2001' PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='2001' +explain select * from loc_orc where year='2001' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - TOK_TABLE_OR_COL - year - '2001' - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -488,59 +176,14 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 405 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for all partitions @@ -562,32 +205,11 @@ POSTHOOK: Output: default@loc_orc POSTHOOK: Output: default@loc_orc@year=2001 POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - = - TOK_TABLE_OR_COL - year - '__HIVE_DEFAULT_PARTITION__' - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -595,55 +217,10 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -651,27 +228,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -679,136 +240,22 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__' PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - or - = - TOK_TABLE_OR_COL - year - '2001' - = - TOK_TABLE_OR_COL - year - '__HIVE_DEFAULT_PARTITION__' - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -816,138 +263,24 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- both partitions will be pruned -- basicStatState: NONE colStatState: NONE -explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__' PREHOOK: type: QUERY POSTHOOK: query: -- both partitions will be pruned -- basicStatState: NONE colStatState: NONE -explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__' +explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - = - TOK_TABLE_OR_COL - year - '2001' - = - TOK_TABLE_OR_COL - year - '__HIVE_DEFAULT_PARTITION__' - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -959,9 +292,7 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - GatherStats: false Filter Operator - isSamplingPred: false predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator @@ -983,28 +314,11 @@ POSTHOOK: Input: default@loc_orc POSTHOOK: Input: default@loc_orc@year=2001 #### A masked pattern was here #### PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select zip from loc_orc +explain select zip from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select zip from loc_orc +explain select zip from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - zip - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1015,129 +329,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: zip (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc -#### A masked pattern was here #### - Partition - base file name: year=__HIVE_DEFAULT_PARTITION__ - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [loc_orc] - /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] Stage: Stage-0 Fetch Operator @@ -1146,28 +349,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select state from loc_orc +explain select state from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select state from loc_orc +explain select state from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1178,129 +364,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc -#### A masked pattern was here #### - Partition - base file name: year=__HIVE_DEFAULT_PARTITION__ - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [loc_orc] - /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] Stage: Stage-0 Fetch Operator @@ -1310,32 +385,12 @@ STAGE PLANS: PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select state,locid from loc_orc +explain select state,locid from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select state,locid from loc_orc +explain select state,locid from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1346,129 +401,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc -#### A masked pattern was here #### - Partition - base file name: year=__HIVE_DEFAULT_PARTITION__ - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [loc_orc] - /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] Stage: Stage-0 Fetch Operator @@ -1477,36 +421,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select state,locid from loc_orc where year='2001' +explain select state,locid from loc_orc where year='2001' PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select state,locid from loc_orc where year='2001' +explain select state,locid from loc_orc where year='2001' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_WHERE - = - TOK_TABLE_OR_COL - year - '2001' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1517,83 +436,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [loc_orc] Stage: Stage-0 Fetch Operator @@ -1602,36 +456,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select state,locid from loc_orc where year!='2001' +explain select state,locid from loc_orc where year!='2001' PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select state,locid from loc_orc where year!='2001' +explain select state,locid from loc_orc where year!='2001' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_WHERE - != - TOK_TABLE_OR_COL - year - '2001' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1643,82 +472,17 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=__HIVE_DEFAULT_PARTITION__ - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] Stage: Stage-0 Fetch Operator @@ -1727,27 +491,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1755,139 +503,24 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Partition - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year __HIVE_DEFAULT_PARTITION__ - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 1 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 325 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 730 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- This is to test filter expression evaluation on partition column -- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE -explain extended select locid from loc_orc where locid>0 and year='2001' +explain select locid from loc_orc where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: -- This is to test filter expression evaluation on partition column -- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE -explain extended select locid from loc_orc where locid>0 and year='2001' +explain select locid from loc_orc where locid>0 and year='2001' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_WHERE - and - > - TOK_TABLE_OR_COL - locid - 0 - = - TOK_TABLE_OR_COL - year - '2001' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1898,10 +531,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1910,75 +541,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [loc_orc] Stage: Stage-0 Fetch Operator @@ -1986,40 +553,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select locid,year from loc_orc where locid>0 and year='2001' +PREHOOK: query: explain select locid,year from loc_orc where locid>0 and year='2001' PREHOOK: type: QUERY -POSTHOOK: query: explain extended select locid,year from loc_orc where locid>0 and year='2001' +POSTHOOK: query: explain select locid,year from loc_orc where locid>0 and year='2001' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_SELEXPR - TOK_TABLE_OR_COL - year - TOK_WHERE - and - > - TOK_TABLE_OR_COL - locid - 0 - = - TOK_TABLE_OR_COL - year - '2001' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2030,10 +567,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2042,75 +577,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [loc_orc] Stage: Stage-0 Fetch Operator @@ -2118,51 +589,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001' +PREHOOK: query: explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001' PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001' +POSTHOOK: query: explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - locid - TOK_SELEXPR - TOK_TABLE_OR_COL - year - test - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - > - TOK_TABLE_OR_COL - locid - 0 - = - TOK_TABLE_OR_COL - year - '2001' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2173,10 +603,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 405 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - isSamplingPred: false predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2185,75 +613,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: year=2001 - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - partition values: - year 2001 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 7 - partition_columns year - partition_columns.types string - rawDataSize 0 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 405 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - bucket_count -1 - columns state,locid,zip - columns.comments - columns.types string:int:bigint -#### A masked pattern was here #### - name default.loc_orc - partition_columns year - partition_columns.types string - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc/year=2001 [test:loc_orc] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index 8b06d0d..9d4cf98 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -88,27 +88,11 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 -explain extended select * from alltypes_orc +explain select * from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 -explain extended select * from alltypes_orc +explain select * from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -120,7 +104,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -138,27 +121,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### PREHOOK: query: -- numRows: 2 rawDataSize: 1514 -explain extended select * from alltypes_orc +explain select * from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 1514 -explain extended select * from alltypes_orc +explain select * from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -170,7 +137,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -178,28 +144,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 8 -explain extended select bo1 from alltypes_orc +explain select bo1 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 8 -explain extended select bo1 from alltypes_orc +explain select bo1 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - bo1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -211,87 +160,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: bo1 (type: boolean) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types boolean - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -301,30 +180,12 @@ STAGE PLANS: PREHOOK: query: -- col alias renaming -- numRows: 2 rawDataSize: 8 -explain extended select i1 as int1 from alltypes_orc +explain select i1 as int1 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- col alias renaming -- numRows: 2 rawDataSize: 8 -explain extended select i1 as int1 from alltypes_orc +explain select i1 as int1 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - int1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -336,87 +197,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: i1 (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -425,28 +216,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 174 -explain extended select s1 from alltypes_orc +explain select s1 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 174 -explain extended select s1 from alltypes_orc +explain select s1 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - s1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -458,87 +232,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: s1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -548,29 +252,12 @@ STAGE PLANS: PREHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated -- numRows: 2 rawDataSize: 1514 -explain extended select m1 from alltypes_orc +explain select m1 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated -- numRows: 2 rawDataSize: 1514 -explain extended select m1 from alltypes_orc +explain select m1 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - m1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -582,87 +269,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE - GatherStats: false Select Operator expressions: m1 (type: map) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types map - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -671,49 +288,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 246 -explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 246 -explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - bo1 - TOK_SELEXPR - TOK_TABLE_OR_COL - ti1 - TOK_SELEXPR - TOK_TABLE_OR_COL - si1 - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_SELEXPR - TOK_TABLE_OR_COL - bi1 - TOK_SELEXPR - TOK_TABLE_OR_COL - f1 - TOK_SELEXPR - TOK_TABLE_OR_COL - d1 - TOK_SELEXPR - TOK_TABLE_OR_COL - s1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -725,87 +304,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), s1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 2 Data size: 246 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 246 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types boolean:tinyint:smallint:int:bigint:float:double:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -814,27 +323,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 0 -explain extended select null from alltypes_orc +explain select null from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 0 -explain extended select null from alltypes_orc +explain select null from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_NULL - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -846,87 +339,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: null (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -935,27 +358,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 8 -explain extended select 11 from alltypes_orc +explain select 11 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 8 -explain extended select 11 from alltypes_orc +explain select 11 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - 11 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -967,87 +374,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 11 (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1056,27 +393,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 16 -explain extended select 11L from alltypes_orc +explain select 11L from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 16 -explain extended select 11L from alltypes_orc +explain select 11L from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - 11L - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1088,87 +409,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 11 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1177,27 +428,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 16 -explain extended select 11.0 from alltypes_orc +explain select 11.0 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 16 -explain extended select 11.0 from alltypes_orc +explain select 11.0 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - 11.0 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1209,87 +444,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 11.0 (type: double) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1298,27 +463,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 178 -explain extended select "hello" from alltypes_orc +explain select "hello" from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 178 -explain extended select "hello" from alltypes_orc +explain select "hello" from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - "hello" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1330,87 +479,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 'hello' (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1418,29 +497,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select cast("hello" as char(5)) from alltypes_orc +PREHOOK: query: explain select cast("hello" as char(5)) from alltypes_orc PREHOOK: type: QUERY -POSTHOOK: query: explain extended select cast("hello" as char(5)) from alltypes_orc +POSTHOOK: query: explain select cast("hello" as char(5)) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - TOK_CHAR - 5 - "hello" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1452,87 +512,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: CAST( 'hello' AS CHAR(5) (type: char(5)) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types char(5) - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1540,29 +530,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain extended select cast("hello" as varchar(5)) from alltypes_orc +PREHOOK: query: explain select cast("hello" as varchar(5)) from alltypes_orc PREHOOK: type: QUERY -POSTHOOK: query: explain extended select cast("hello" as varchar(5)) from alltypes_orc +POSTHOOK: query: explain select cast("hello" as varchar(5)) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - TOK_VARCHAR - 5 - "hello" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1574,87 +545,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: CAST( 'hello' AS varchar(5)) (type: varchar(5)) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types varchar(5) - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1663,29 +564,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 96 -explain extended select unbase64("0xe23") from alltypes_orc +explain select unbase64("0xe23") from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 96 -explain extended select unbase64("0xe23") from alltypes_orc +explain select unbase64("0xe23") from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - unbase64 - "0xe23" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1697,87 +580,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: D317B6 (type: binary) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types binary - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1786,33 +599,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 16 -explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 16 -explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - TOK_TINYINT - "1" - TOK_SELEXPR - TOK_FUNCTION - TOK_SMALLINT - "20" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1824,87 +615,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 1 (type: tinyint), 20 (type: smallint) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types tinyint:smallint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -1913,29 +634,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 80 -explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 80 -explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - TOK_TIMESTAMP - "1970-12-31 15:59:58.174" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1947,87 +650,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 1970-12-31 15:59:58.174 (type: timestamp) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types timestamp - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2036,29 +669,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 112 -explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 112 -explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - TOK_DATE - "1970-12-31 15:59:58.174" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2070,87 +685,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: null (type: void) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types date - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2159,29 +704,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 224 -explain extended select cast("58.174" as DECIMAL) from alltypes_orc +explain select cast("58.174" as DECIMAL) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 224 -explain extended select cast("58.174" as DECIMAL) from alltypes_orc +explain select cast("58.174" as DECIMAL) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - TOK_DECIMAL - "58.174" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2193,87 +720,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: CAST( '58.174' AS decimal(10,0)) (type: decimal(10,0)) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types decimal(10,0) - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2282,31 +739,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 112 -explain extended select array(1,2,3) from alltypes_orc +explain select array(1,2,3) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 112 -explain extended select array(1,2,3) from alltypes_orc +explain select array(1,2,3) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - array - 1 - 2 - 3 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2318,87 +755,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: array(1,2,3) (type: array) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types array - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2407,31 +774,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 1508 -explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 1508 -explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - str_to_map - "a=1 b=2 c=3" - " " - "=" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2443,87 +790,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: str_to_map('a=1 b=2 c=3',' ','=') (type: map) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types map - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2532,32 +809,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 112 -explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 112 -explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - NAMED_STRUCT - "a" - 11 - "b" - 11 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2569,87 +825,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: named_struct('a',11,'b',11) (type: struct) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types struct - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2658,30 +844,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 250 -explain extended select CREATE_UNION(0, "hello") from alltypes_orc +explain select CREATE_UNION(0, "hello") from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 2 rawDataSize: 250 -explain extended select CREATE_UNION(0, "hello") from alltypes_orc +explain select CREATE_UNION(0, "hello") from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - CREATE_UNION - 0 - "hello" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2693,87 +860,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: create_union(0,'hello') (type: uniontype) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types uniontype - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -2783,29 +880,12 @@ STAGE PLANS: PREHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows -- numRows: 1 rawDataSize: 8 -explain extended select count(*) from alltypes_orc +explain select count(*) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows -- numRows: 1 rawDataSize: 8 -explain extended select count(*) from alltypes_orc +explain select count(*) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2817,7 +897,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2828,65 +907,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2899,25 +920,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2927,30 +934,12 @@ STAGE PLANS: PREHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows -- numRows: 1 rawDataSize: 8 -explain extended select count(1) from alltypes_orc +explain select count(1) from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows -- numRows: 1 rawDataSize: 8 -explain extended select count(1) from alltypes_orc +explain select count(1) from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTION - count - 1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2962,7 +951,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2973,65 +961,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3044,25 +974,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3072,30 +988,12 @@ STAGE PLANS: PREHOOK: query: -- column statistics for complex column types will be missing. data size will be calculated from available column statistics -- numRows: 2 rawDataSize: 254 -explain extended select *,11 from alltypes_orc +explain select *,11 from alltypes_orc PREHOOK: type: QUERY POSTHOOK: query: -- column statistics for complex column types will be missing. data size will be calculated from available column statistics -- numRows: 2 rawDataSize: 254 -explain extended select *,11 from alltypes_orc +explain select *,11 from alltypes_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_SELEXPR - 11 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3107,87 +1005,17 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15 - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -3198,44 +1026,13 @@ STAGE PLANS: PREHOOK: query: -- subquery selects -- inner select - numRows: 2 rawDataSize: 8 -- outer select - numRows: 2 rawDataSize: 8 -explain extended select i1 from (select i1 from alltypes_orc limit 10) temp +explain select i1 from (select i1 from alltypes_orc limit 10) temp PREHOOK: type: QUERY POSTHOOK: query: -- subquery selects -- inner select - numRows: 2 rawDataSize: 8 -- outer select - numRows: 2 rawDataSize: 8 -explain extended select i1 from (select i1 from alltypes_orc limit 10) temp +explain select i1 from (select i1 from alltypes_orc limit 10) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_LIMIT - 10 - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3247,7 +1044,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: i1 (type: int) outputColumnNames: _col0 @@ -3258,65 +1054,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [temp:alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) @@ -3331,25 +1069,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3359,45 +1083,12 @@ STAGE PLANS: PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 -- outer select - numRows: 2 rawDataSize: 8 -explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp +explain select i1 from (select i1,11 from alltypes_orc limit 10) temp PREHOOK: type: QUERY POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 -- outer select - numRows: 2 rawDataSize: 8 -explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp +explain select i1 from (select i1,11 from alltypes_orc limit 10) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_SELEXPR - 11 - TOK_LIMIT - 10 - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3409,7 +1100,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: i1 (type: int) outputColumnNames: _col0 @@ -3420,65 +1110,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [temp:alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) @@ -3493,25 +1125,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3521,47 +1139,12 @@ STAGE PLANS: PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 -- outer select - numRows: 2 rawDataSize: 186 -explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp +explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp PREHOOK: type: QUERY POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 -- outer select - numRows: 2 rawDataSize: 186 -explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp +explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_SELEXPR - 11 - TOK_LIMIT - 10 - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_SELEXPR - "hello" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3573,7 +1156,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: i1 (type: int) outputColumnNames: _col0 @@ -3584,65 +1166,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [temp:alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) @@ -3657,25 +1181,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3685,46 +1195,12 @@ STAGE PLANS: PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 -- outer select - numRows: 2 rawDataSize: 16 -explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp +explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp PREHOOK: type: QUERY POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 -- outer select - numRows: 2 rawDataSize: 16 -explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp +explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_SELEXPR - 11.0 - x - TOK_LIMIT - 10 - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - x - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3736,7 +1212,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 11.0 (type: double) outputColumnNames: _col1 @@ -3747,65 +1222,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col1 (type: double) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [temp:alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: double) @@ -3820,25 +1237,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3848,51 +1251,12 @@ STAGE PLANS: PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 -- outer select - numRows: 2 rawDataSize: 186 -explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp +explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp PREHOOK: type: QUERY POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 -- outer select - numRows: 2 rawDataSize: 186 -explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp +explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - x - TOK_SELEXPR - TOK_FUNCTION - unbase64 - "0xe23" - ub - TOK_LIMIT - 10 - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - x - TOK_SELEXPR - "hello" - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -3904,7 +1268,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: i1 (type: int) outputColumnNames: _col0 @@ -3915,65 +1278,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [temp:alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) @@ -3988,25 +1293,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4017,64 +1308,13 @@ STAGE PLANS: PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 186 -- middle select - numRows: 2 rawDataSize: 178 -- outer select - numRows: 2 rawDataSize: 194 -explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 +explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 PREHOOK: type: QUERY POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 186 -- middle select - numRows: 2 rawDataSize: 178 -- outer select - numRows: 2 rawDataSize: 194 -explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 +explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - i1 - TOK_SELEXPR - "hello" - hell - TOK_LIMIT - 10 - in1 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - hell - h - TOK_LIMIT - 10 - in2 - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - h - TOK_SELEXPR - 11.0 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4087,7 +1327,6 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: 'hello' (type: string) outputColumnNames: _col1 @@ -4098,65 +1337,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [in2:in1:alltypes_orc] - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string) @@ -4174,59 +1355,19 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 value expressions: _col0 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -4241,25 +1382,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4269,32 +1396,12 @@ STAGE PLANS: PREHOOK: query: -- This test is for FILTER operator where filter expression is a boolean column -- numRows: 2 rawDataSize: 8 -explain extended select bo1 from alltypes_orc where bo1 +explain select bo1 from alltypes_orc where bo1 PREHOOK: type: QUERY POSTHOOK: query: -- This test is for FILTER operator where filter expression is a boolean column -- numRows: 2 rawDataSize: 8 -explain extended select bo1 from alltypes_orc where bo1 +explain select bo1 from alltypes_orc where bo1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - bo1 - TOK_WHERE - TOK_TABLE_OR_COL - bo1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4306,9 +1413,7 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: bo1 (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -4317,80 +1422,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types boolean - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator @@ -4399,32 +1435,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select bo1 from alltypes_orc where !bo1 +explain select bo1 from alltypes_orc where !bo1 PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 0 rawDataSize: 0 -explain extended select bo1 from alltypes_orc where !bo1 +explain select bo1 from alltypes_orc where !bo1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - alltypes_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - bo1 - TOK_WHERE - ! - TOK_TABLE_OR_COL - bo1 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4436,9 +1451,7 @@ STAGE PLANS: TableScan alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Filter Operator - isSamplingPred: false predicate: (not bo1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator @@ -4447,80 +1460,11 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types boolean - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypes_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - colelction.delim , - columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,vc1,m1,l1,st1 - columns.comments - columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:varchar(5):map:array:struct - field.delim | -#### A masked pattern was here #### - mapkey.delim : - name default.alltypes_orc - numFiles 1 - numRows 2 - rawDataSize 1686 - serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, varchar(5) vc1, map m1, list l1, struct st1} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1475 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.alltypes_orc - name: default.alltypes_orc - Truncated Path -> Alias: - /alltypes_orc [alltypes_orc] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out index bb2d18c..c804700 100644 --- ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -27,27 +27,11 @@ POSTHOOK: type: ALTERTABLE_FILEFORMAT POSTHOOK: Input: default@emp_orc POSTHOOK: Output: default@emp_orc PREHOOK: query: -- basicStatState: NONE colStatState: NONE -explain extended select * from emp_orc +explain select * from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: NONE colStatState: NONE -explain extended select * from emp_orc +explain select * from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -59,7 +43,6 @@ STAGE PLANS: TableScan alias: emp_orc Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - GatherStats: false Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 @@ -87,29 +70,13 @@ POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from emp_orc +explain select * from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL -- basicStatState: PARTIAL colStatState: NONE -explain extended select * from emp_orc +explain select * from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -120,12 +87,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 3 Data size: 349 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 3 Data size: 364 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 349 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 364 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- table level analyze statistics @@ -139,27 +105,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc POSTHOOK: Output: default@emp_orc PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from emp_orc +explain select * from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE -explain extended select * from emp_orc +explain select * from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -170,12 +120,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- column level partial statistics @@ -189,27 +138,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc #### A masked pattern was here #### PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select * from emp_orc +explain select * from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL -explain extended select * from emp_orc +explain select * from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -220,39 +153,21 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: PARTIAL - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- all selected columns have statistics -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select deptid from emp_orc +explain select deptid from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- all selected columns have statistics -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select deptid from emp_orc +explain select deptid from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - deptid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -263,84 +178,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptid (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /emp_orc [emp_orc] Stage: Stage-0 Fetch Operator @@ -359,27 +208,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc #### A masked pattern was here #### PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select * from emp_orc +explain select * from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select * from emp_orc +explain select * from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -390,37 +223,19 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select lastname from emp_orc +explain select lastname from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select lastname from emp_orc +explain select lastname from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - lastname - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -431,84 +246,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 48 Data size: 4368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /emp_orc [emp_orc] Stage: Stage-0 Fetch Operator @@ -517,28 +266,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select deptid from emp_orc +explain select deptid from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select deptid from emp_orc +explain select deptid from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - deptid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -549,84 +281,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptid (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /emp_orc [emp_orc] Stage: Stage-0 Fetch Operator @@ -635,31 +301,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select lastname,deptid from emp_orc +explain select lastname,deptid from emp_orc PREHOOK: type: QUERY POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE -explain extended select lastname,deptid from emp_orc +explain select lastname,deptid from emp_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - emp_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - lastname - TOK_SELEXPR - TOK_TABLE_OR_COL - deptid - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -670,84 +316,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: emp_orc - Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 48 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 566 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 566 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### + Statistics: Num rows: 48 Data size: 4560 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: emp_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns lastname,deptid - columns.comments - columns.types string:int - field.delim | -#### A masked pattern was here #### - name default.emp_orc - numFiles 1 - numRows 6 - rawDataSize 0 - serialization.ddl struct emp_orc { string lastname, i32 deptid} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 349 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.emp_orc - name: default.emp_orc - Truncated Path -> Alias: - /emp_orc [emp_orc] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out index 6d179b6..8f0682d 100644 --- ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -59,28 +59,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc #### A masked pattern was here #### PREHOOK: query: -- numRows: 8 rawDataSize: 688 -explain extended select state from loc_orc +explain select state from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 688 -explain extended select state from loc_orc +explain select state from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -92,83 +75,17 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [loc_orc] Stage: Stage-0 Fetch Operator @@ -177,53 +94,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +explain select * from (select state from loc_orc union all select state from loc_orc) tmp PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +explain select * from (select state from loc_orc union all select state from loc_orc) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - tmp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -235,7 +110,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -248,29 +122,14 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -283,76 +142,11 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] Stage: Stage-0 Fetch Operator @@ -361,27 +155,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 8 rawDataSize: 796 -explain extended select * from loc_orc +explain select * from loc_orc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -393,7 +171,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -401,51 +178,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1592 -explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +explain select * from (select * from loc_orc union all select * from loc_orc) tmp PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 16 rawDataSize: 1592 -explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +explain select * from (select * from loc_orc union all select * from loc_orc) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - tmp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -457,7 +194,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -470,29 +206,14 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -505,76 +226,11 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:int:bigint:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc - Truncated Path -> Alias: - /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] Stage: Stage-0 Fetch Operator @@ -667,55 +323,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_orc #### A masked pattern was here #### PREHOOK: query: -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - default - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - test - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -727,7 +339,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -740,29 +351,14 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -775,123 +371,11 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name default.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.loc_orc - name: default.loc_orc -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name test.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name test.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: test.loc_orc - name: test.loc_orc - Truncated Path -> Alias: - /loc_orc [null-subquery1:temp-subquery1:loc_orc] - /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] Stage: Stage-0 Fetch Operator @@ -900,55 +384,11 @@ STAGE PLANS: ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp PREHOOK: type: QUERY POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 -explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_SUBQUERY - TOK_UNION - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - test - loc_staging - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - test - loc_orc - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - state - temp - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -960,7 +400,6 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -973,29 +412,14 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false TableScan alias: loc_staging - Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -1008,123 +432,11 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: loc_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name test.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name test.loc_orc - numFiles 1 - numRows 8 - rawDataSize 796 - serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 493 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: test.loc_orc - name: test.loc_orc -#### A masked pattern was here #### - Partition - base file name: loc_staging - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name test.loc_staging - numFiles 1 - numRows 8 - rawDataSize 117 - serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 125 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns state,locid,zip,year - columns.comments - columns.types string:int:bigint:int - field.delim | -#### A masked pattern was here #### - name test.loc_staging - numFiles 1 - numRows 8 - rawDataSize 117 - serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} - serialization.format | - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 125 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: test.loc_staging - name: test.loc_staging - Truncated Path -> Alias: - /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] - /test.db/loc_staging [null-subquery1:temp-subquery1:loc_staging] Stage: Stage-0 Fetch Operator