diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java index 02216de..0d668a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; import java.util.List; import java.util.Stack; @@ -164,17 +166,22 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, canApplyCtx.setGbyKeysFetchException(true); } for (ExprNodeDesc expr : keyList) { - checkExpression(canApplyCtx, expr); + checkExpression(canApplyCtx, expr, operator, topOp); } } return null; } - private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){ + private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr, Operator operator, Operator topOp){ if(expr instanceof ExprNodeColumnDesc){ //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later //if all keys are from index columns - canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); + try { + expr = ExprNodeDescUtils.backtrack(expr, operator, topOp); + } catch (SemanticException e) { + e.printStackTrace(); + } + canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); }else if(expr instanceof ExprNodeGenericFuncDesc){ ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc)expr; List childExprs = funcExpr.getChildren(); @@ -183,7 +190,7 @@ private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){ canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn()); }else if(childExpr instanceof ExprNodeGenericFuncDesc){ - checkExpression(canApplyCtx, childExpr); + checkExpression(canApplyCtx, childExpr, operator, topOp); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java index 0f06ec9..40fa847 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java @@ -378,7 +378,7 @@ private void rewriteOriginalQuery() throws SemanticException { RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb, canApplyCtx.getIndexTableName(), canApplyCtx.getAlias(), - canApplyCtx.getAllColumns(), canApplyCtx.getAggFunction()); + canApplyCtx.getAllColumns(), canApplyCtx.getAggFunction(), canApplyCtx.getGbKeyNameList()); rewriteQueryCtx.invokeRewriteQueryProc(topOp); parseContext = rewriteQueryCtx.getParseContext(); parseContext.setOpParseCtx((LinkedHashMap, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java index 74614f3..0efb710 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java @@ -74,18 +74,27 @@ private RewriteQueryUsingAggregateIndex() { //this prevents the class from getting instantiated } + // for SEL1-SEL2-GRY-...-SEL3 + // we need to modify SelectOperator which precedes the GroupByOperator, e.g., SEL1, SEL2 + // and keep SelectOperator which comes after the GroupByOperator, e.g., SEL3 + private static boolean precedeGroupbyOp(Stack stack) { + for (Node node : stack) { + if (node instanceof GroupByOperator) + return false; + } + return true; + } + private static class NewQuerySelectSchemaProc implements NodeProcessor { public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { SelectOperator operator = (SelectOperator)nd; RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx; - List> childOps = operator.getChildOperators(); - Operator childOp = childOps.iterator().next(); //we need to set the colList, outputColumnNames, colExprMap, // rowSchema for only that SelectOperator which precedes the GroupByOperator // count(indexed_key_column) needs to be replaced by sum(`_count_of_indexed_key_column`) - if (childOp instanceof GroupByOperator){ + if (precedeGroupbyOp(stack)) { List selColList = operator.getConf().getColList(); selColList.add(rewriteQueryCtx.getAggrExprNode()); @@ -94,6 +103,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, operator.getConf().getOutputColumnNames(); selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn()); + operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(), + rewriteQueryCtx.getAggrExprNode()); + RowSchema selRS = operator.getSchema(); List selRSSignature = selRS.getSignature(); @@ -229,22 +241,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, //We need to replace the GroupByOperator which is in //groupOpToInputTables map with the new GroupByOperator - if(rewriteQueryCtx.getParseContext().getGroupOpToInputTables().containsKey(operator)){ - List gbyKeyList = operator.getConf().getKeys(); - String gbyKeys = null; - Iterator gbyKeyListItr = gbyKeyList.iterator(); - while(gbyKeyListItr.hasNext()){ - ExprNodeDesc expr = gbyKeyListItr.next().clone(); - if(expr instanceof ExprNodeColumnDesc){ - ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr; - gbyKeys = colExpr.getColumn(); - if(gbyKeyListItr.hasNext()){ - gbyKeys = gbyKeys + ","; - } - } + if (rewriteQueryCtx.getParseContext().getGroupOpToInputTables().containsKey(operator)) { + Iterator gbyKeyListItr = rewriteQueryCtx.getGbKeyNameList().iterator(); + String gbyKeys = gbyKeyListItr.next(); + while (gbyKeyListItr.hasNext()) { + gbyKeys = gbyKeys + "," + gbyKeyListItr.next(); } - //the query contains the sum aggregation GenericUDAF String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java index d699308..77b3ce5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java @@ -55,7 +55,8 @@ public final class RewriteQueryUsingAggregateIndexCtx implements NodeProcessorCtx { private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb, - String indexTableName, String alias, Set columns, String aggregateFunction) { + String indexTableName, String alias, Set columns, String aggregateFunction, + Set gbKeyNameList) { this.parseContext = parseContext; this.hiveDb = hiveDb; this.indexTableName = indexTableName; @@ -63,13 +64,14 @@ private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveD this.aggregateFunction = aggregateFunction; this.columns = columns; this.opc = parseContext.getOpParseCtx(); + this.gbKeyNameList = gbKeyNameList; } public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseContext, Hive hiveDb, String indexTableName, String alias, - Set columns, String aggregateFunction) { + Set columns, String aggregateFunction, Set gbKeyNameList) { return new RewriteQueryUsingAggregateIndexCtx( - parseContext, hiveDb, indexTableName, alias, columns, aggregateFunction); + parseContext, hiveDb, indexTableName, alias, columns, aggregateFunction, gbKeyNameList); } @@ -84,6 +86,7 @@ public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseC private final String aggregateFunction; private final Set columns; private ExprNodeColumnDesc aggrExprNode = null; + private final Set gbKeyNameList; public Map, OpParseContext> getOpc() { return opc; @@ -116,6 +119,10 @@ public void setAggrExprNode(ExprNodeColumnDesc aggrExprNode) { public ExprNodeColumnDesc getAggrExprNode() { return aggrExprNode; } + + public Set getGbKeyNameList() { + return gbKeyNameList; + } /** * Walk the original operator tree using the {@link DefaultGraphWalker} using the rules. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java index b15df0f..56eb928 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import java.util.Arrays; +import java.util.List; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.hive.serde.serdeConstants; @@ -154,4 +156,8 @@ public int hashCode() { builder.append(value); return builder.toHashCode(); } + + public List getCols() { + return Arrays.asList(foldedFromCol); + } } diff --git a/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo.q b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo.q new file mode 100644 index 0000000..9ce7d85 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo.q @@ -0,0 +1,173 @@ +set hive.stats.dbclass=fs; +set hive.stats.autogather=true; +set hive.cbo.enable=true; + +DROP TABLE IF EXISTS lineitem_ix; +CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix; + +CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)"); +ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD; + +explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate; + +select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate; + +set hive.optimize.index.groupby=true; + +explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate; + +select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate; + +set hive.optimize.index.groupby=false; + + +explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +set hive.optimize.index.groupby=true; + +explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month; + +explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy; + +CREATE TABLE tbl(key int, value int); +CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX tbl_key_idx ON tbl REBUILD; + +EXPLAIN select key, count(key) from tbl where key = 1 group by key; +EXPLAIN select key, count(key) from tbl group by key; + +EXPLAIN select count(1) from tbl; +EXPLAIN select count(key) from tbl; + +EXPLAIN select key FROM tbl GROUP BY key; +EXPLAIN select key FROM tbl GROUP BY value, key; +EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key; +EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key; +EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3); + +EXPLAIN select key, value FROM tbl GROUP BY value, key; +EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value; + +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key, value FROM tbl; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key; +EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key; +EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl; + +EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2; + +DROP TABLE tbl; + +CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int); +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12; + +CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); + +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD; +EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key; + +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD; +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD; +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD; +EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key; + +DROP INDEX tbl_part_index on tblpart; +DROP TABLE tblpart; + +CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; +LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl; + +CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX tbl_key_idx ON tbl REBUILD; + +set hive.optimize.index.groupby=false; +explain select key, count(key) from tbl group by key order by key; +select key, count(key) from tbl group by key order by key; +set hive.optimize.index.groupby=true; +explain select key, count(key) from tbl group by key order by key; +select key, count(key) from tbl group by key order by key; +DROP TABLE tbl; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo.q.out new file mode 100644 index 0000000..77324b4 --- /dev/null +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo.q.out @@ -0,0 +1,2657 @@ +PREHOOK: query: DROP TABLE IF EXISTS lineitem_ix +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS lineitem_ix +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_shipdate, type:string, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__.l_shipdate SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_shipdate, type:string, comment:null), ] +PREHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992-04-27 1 +1992-07-02 1 +1992-07-10 1 +1992-07-21 1 +1993-04-01 1 +1993-04-13 1 +1993-05-14 1 +1993-10-29 2 +1993-11-09 2 +1993-12-04 1 +1993-12-09 2 +1993-12-14 1 +1994-01-12 1 +1994-01-16 1 +1994-01-26 2 +1994-02-02 1 +1994-02-13 1 +1994-02-19 1 +1994-02-21 1 +1994-03-03 1 +1994-03-17 1 +1994-06-03 1 +1994-06-06 1 +1994-07-02 1 +1994-07-19 1 +1994-07-31 1 +1994-08-08 1 +1994-08-17 1 +1994-08-24 1 +1994-09-30 1 +1994-10-03 1 +1994-10-16 1 +1994-10-31 1 +1994-12-01 1 +1994-12-24 1 +1994-12-30 1 +1995-04-20 1 +1995-07-06 1 +1995-07-17 1 +1995-07-21 1 +1995-08-04 1 +1995-08-07 1 +1995-08-14 1 +1995-08-28 1 +1995-10-23 1 +1995-11-08 1 +1995-11-26 1 +1996-01-10 1 +1996-01-15 1 +1996-01-16 1 +1996-01-19 1 +1996-01-22 1 +1996-01-29 1 +1996-01-30 1 +1996-02-01 2 +1996-02-03 1 +1996-02-10 1 +1996-02-11 1 +1996-02-21 1 +1996-03-13 1 +1996-03-21 1 +1996-03-30 1 +1996-04-12 1 +1996-04-21 1 +1996-05-07 1 +1996-09-26 1 +1996-09-29 1 +1996-10-02 1 +1996-10-17 1 +1996-11-04 1 +1996-11-14 1 +1996-12-08 1 +1997-01-25 1 +1997-01-27 1 +1997-01-28 1 +1997-02-20 1 +1997-03-18 1 +1997-04-17 1 +1997-04-19 1 +1998-01-29 1 +1998-02-23 1 +1998-03-05 1 +1998-04-10 1 +1998-04-12 1 +1998-05-23 1 +1998-06-19 1 +1998-06-24 1 +1998-06-26 1 +1998-06-27 1 +1998-07-04 1 +1998-08-11 1 +1998-08-13 1 +1998-10-09 1 +1998-10-23 1 +1998-10-30 1 +PREHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992-04-27 1 +1992-07-02 1 +1992-07-10 1 +1992-07-21 1 +1993-04-01 1 +1993-04-13 1 +1993-05-14 1 +1993-10-29 2 +1993-11-09 2 +1993-12-04 1 +1993-12-09 2 +1993-12-14 1 +1994-01-12 1 +1994-01-16 1 +1994-01-26 2 +1994-02-02 1 +1994-02-13 1 +1994-02-19 1 +1994-02-21 1 +1994-03-03 1 +1994-03-17 1 +1994-06-03 1 +1994-06-06 1 +1994-07-02 1 +1994-07-19 1 +1994-07-31 1 +1994-08-08 1 +1994-08-17 1 +1994-08-24 1 +1994-09-30 1 +1994-10-03 1 +1994-10-16 1 +1994-10-31 1 +1994-12-01 1 +1994-12-24 1 +1994-12-30 1 +1995-04-20 1 +1995-07-06 1 +1995-07-17 1 +1995-07-21 1 +1995-08-04 1 +1995-08-07 1 +1995-08-14 1 +1995-08-28 1 +1995-10-23 1 +1995-11-08 1 +1995-11-26 1 +1996-01-10 1 +1996-01-15 1 +1996-01-16 1 +1996-01-19 1 +1996-01-22 1 +1996-01-29 1 +1996-01-30 1 +1996-02-01 2 +1996-02-03 1 +1996-02-10 1 +1996-02-11 1 +1996-02-21 1 +1996-03-13 1 +1996-03-21 1 +1996-03-30 1 +1996-04-12 1 +1996-04-21 1 +1996-05-07 1 +1996-09-26 1 +1996-09-29 1 +1996-10-02 1 +1996-10-17 1 +1996-11-04 1 +1996-11-14 1 +1996-12-08 1 +1997-01-25 1 +1997-01-27 1 +1997-01-28 1 +1997-02-20 1 +1997-03-18 1 +1997-04-17 1 +1997-04-19 1 +1998-01-29 1 +1998-02-23 1 +1998-03-05 1 +1998-04-10 1 +1998-04-12 1 +1998-05-23 1 +1998-06-19 1 +1998-06-24 1 +1998-06-26 1 +1998-06-27 1 +1998-07-04 1 +1998-08-11 1 +1998-08-13 1 +1998-10-09 1 +1998-10-23 1 +1998-10-30 1 +PREHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +POSTHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992 4 1 +1992 7 3 +1993 4 2 +1993 5 1 +1993 10 2 +1993 11 2 +1993 12 4 +1994 1 4 +1994 2 4 +1994 3 2 +1994 6 2 +1994 7 3 +1994 8 3 +1994 9 1 +1994 10 3 +1994 12 3 +1995 4 1 +1995 7 3 +1995 8 4 +1995 10 1 +1995 11 2 +1996 1 7 +1996 2 6 +1996 3 3 +1996 4 2 +1996 5 1 +1996 9 2 +1996 10 2 +1996 11 2 +1996 12 1 +1997 1 3 +1997 2 1 +1997 3 1 +1997 4 2 +1998 1 1 +1998 2 1 +1998 3 1 +1998 4 2 +1998 5 1 +1998 6 4 +1998 7 1 +1998 8 2 +1998 10 3 +PREHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +POSTHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _col1, _col2, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992 4 1 +1992 7 3 +1993 4 2 +1993 5 1 +1993 10 2 +1993 11 2 +1993 12 4 +1994 1 4 +1994 2 4 +1994 3 2 +1994 6 2 +1994 7 3 +1994 8 3 +1994 9 1 +1994 10 3 +1994 12 3 +1995 4 1 +1995 7 3 +1995 8 4 +1995 10 1 +1995 11 2 +1996 1 7 +1996 2 6 +1996 3 3 +1996 4 2 +1996 5 1 +1996 9 2 +1996 10 2 +1996 11 2 +1996 12 1 +1997 1 3 +1997 2 1 +1997 3 1 +1997 4 2 +1998 1 1 +1998 2 1 +1998 3 1 +1998 4 2 +1998 5 1 +1998 6 4 +1998 7 1 +1998 8 2 +1998 10 3 +PREHOOK: query: explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month +PREHOOK: type: QUERY +POSTHOOK: query: explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (year(l_shipdate) = 1998) (type: boolean) + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string) + outputColumnNames: l_shipdate + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 15 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 705 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 7 Data size: 705 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 705 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 705 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 7 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col4 (type: int), ((_col5 - _col2) / _col2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (year(l_shipdate) = 1997) (type: boolean) + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string) + outputColumnNames: l_shipdate + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 15 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 705 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 7 Data size: 705 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: lineitem_ix + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), UDFToLong(l_orderkey) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE tbl(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tbl +POSTHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +PREHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tbl +PREHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN select key, count(key) from tbl where key = 1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, count(key) from tbl where key = 1 group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, count(key) from tbl group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, count(key) from tbl group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(1) from tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(1) from tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(key) from tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(key) from tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY value, key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY value, key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(key, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, value FROM tbl GROUP BY value, key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, value FROM tbl GROUP BY value, key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((value = 2) and (key = 3)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: DROP TABLE tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tblpart +POSTHOOK: query: CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tblpart +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tblpart +POSTHOOK: query: CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tblpart +POSTHOOK: Output: default@default__tblpart_tbl_part_index__ +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tblpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tblpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP INDEX tbl_part_index on tblpart +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@tblpart +POSTHOOK: query: DROP INDEX tbl_part_index on tblpart +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@tblpart +PREHOOK: query: DROP TABLE tblpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tblpart +PREHOOK: Output: default@tblpart +POSTHOOK: query: DROP TABLE tblpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tblpart +POSTHOOK: Output: default@tblpart +PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tbl +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tbl +POSTHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +PREHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tbl +PREHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 1 +2 3 +3 2 +4 2 +6 1 +7 1 +PREHOOK: query: explain select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 266 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 3 Data size: 266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__tbl_tbl_key_idx__ +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__tbl_tbl_key_idx__ +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 1 +2 3 +3 2 +4 2 +6 1 +7 1 +PREHOOK: query: DROP TABLE tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: DROP TABLE tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl