diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java index 9ffa708..bfe8c00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java @@ -21,16 +21,12 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.metastore.api.Index; -import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -66,20 +62,16 @@ public static RewriteCanApplyCtx getInstance(ParseContext parseContext){ private int aggFuncCnt = 0; private boolean queryHasGroupBy = false; private boolean aggFuncIsNotCount = false; + private boolean aggParameterNotSupport = false; private boolean aggFuncColsFetchException = false; - private boolean whrClauseColsFetchException = false; private boolean selClauseColsFetchException = false; private boolean gbyKeysFetchException = false; private boolean countOnAllCols = false; private boolean countOfOne = false; private boolean queryHasMultipleTables = false; - //Data structures that are populated in the RewriteCanApplyProcFactory - //methods to check if the index key meets all criteria - private Set selectColumnsList = new LinkedHashSet(); - private Set predicateColumnsList = new LinkedHashSet(); - private Set gbKeyNameList = new LinkedHashSet(); - private Set aggFuncColList = new LinkedHashSet(); + //The most important, indexKey + private String indexKey; private final ParseContext parseContext; private String alias; @@ -91,19 +83,16 @@ void resetCanApplyCtx(){ setAggFuncCnt(0); setQueryHasGroupBy(false); setAggFuncIsNotCount(false); + setAggParameterNotSupport(false); setAggFuncColsFetchException(false); - setWhrClauseColsFetchException(false); setSelClauseColsFetchException(false); setGbyKeysFetchException(false); setCountOnAllCols(false); setCountOfOne(false); setQueryHasMultipleTables(false); - selectColumnsList.clear(); - predicateColumnsList.clear(); - gbKeyNameList.clear(); - aggFuncColList.clear(); setBaseTableName(""); setAggFunction(""); + setIndexKey(""); } public boolean isQueryHasGroupBy() { @@ -122,6 +111,14 @@ public void setAggFuncIsNotCount(boolean aggFuncIsNotCount) { this.aggFuncIsNotCount = aggFuncIsNotCount; } + public boolean isAggParameterNotSupport() { + return aggParameterNotSupport; + } + + public void setAggParameterNotSupport(boolean aggParameterNotSupport) { + this.aggParameterNotSupport = aggParameterNotSupport; + } + public Map getBaseToIdxTableMap() { return baseToIdxTableMap; } @@ -142,14 +139,6 @@ public boolean isAggFuncColsFetchException() { return aggFuncColsFetchException; } - public void setWhrClauseColsFetchException(boolean whrClauseColsFetchException) { - this.whrClauseColsFetchException = whrClauseColsFetchException; - } - - public boolean isWhrClauseColsFetchException() { - return whrClauseColsFetchException; - } - public void setSelClauseColsFetchException(boolean selClauseColsFetchException) { this.selClauseColsFetchException = selClauseColsFetchException; } @@ -190,38 +179,6 @@ public boolean isQueryHasMultipleTables() { return queryHasMultipleTables; } - public Set getSelectColumnsList() { - return selectColumnsList; - } - - public void setSelectColumnsList(Set selectColumnsList) { - this.selectColumnsList = selectColumnsList; - } - - public Set getPredicateColumnsList() { - return predicateColumnsList; - } - - public void setPredicateColumnsList(Set predicateColumnsList) { - this.predicateColumnsList = predicateColumnsList; - } - - public Set getGbKeyNameList() { - return gbKeyNameList; - } - - public void setGbKeyNameList(Set gbKeyNameList) { - this.gbKeyNameList = gbKeyNameList; - } - - public Set getAggFuncColList() { - return aggFuncColList; - } - - public void setAggFuncColList(Set aggFuncColList) { - this.aggFuncColList = aggFuncColList; - } - public int getAggFuncCnt() { return aggFuncCnt; } @@ -258,15 +215,6 @@ public ParseContext getParseContext() { return parseContext; } - public Set getAllColumns() { - Set allColumns = new LinkedHashSet(selectColumnsList); - allColumns.addAll(predicateColumnsList); - allColumns.addAll(gbKeyNameList); - allColumns.addAll(aggFuncColList); - return allColumns; - } - - /** * This method walks all the nodes starting from topOp TableScanOperator node * and invokes methods from {@link RewriteCanApplyProcFactory} for each of the rules @@ -282,8 +230,8 @@ public ParseContext getParseContext() { void populateRewriteVars(TableScanOperator topOp) throws SemanticException{ Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), - RewriteCanApplyProcFactory.canApplyOnFilterOperator(topOp)); + opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), + RewriteCanApplyProcFactory.canApplyOnTableScanOperator(topOp)); opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"), RewriteCanApplyProcFactory.canApplyOnGroupByOperator(topOp)); @@ -323,67 +271,21 @@ public Object process(Node nd, Stack stack, //Map for base table to index table mapping //TableScan operator for base table will be modified to read from index table - private final Map baseToIdxTableMap = - new HashMap();; - + private final Map baseToIdxTableMap = new HashMap();; public void addTable(String baseTableName, String indexTableName) { - baseToIdxTableMap.put(baseTableName, indexTableName); - } - - public String findBaseTable(String baseTableName) { - return baseToIdxTableMap.get(baseTableName); - } - - - boolean isIndexUsableForQueryBranchRewrite(Index index, Set indexKeyNames){ - - //-------------------------------------------- - //Check if all columns in select list are part of index key columns - if (!indexKeyNames.containsAll(selectColumnsList)) { - LOG.info("Select list has non index key column : " + - " Cannot use index " + index.getIndexName()); - return false; - } - - //-------------------------------------------- - // Check if all columns in where predicate are part of index key columns - if (!indexKeyNames.containsAll(predicateColumnsList)) { - LOG.info("Predicate column ref list has non index key column : " + - " Cannot use index " + index.getIndexName()); - return false; - } + baseToIdxTableMap.put(baseTableName, indexTableName); + } - //-------------------------------------------- - // For group by, we need to check if all keys are from index columns - // itself. Here GB key order can be different than index columns but that does - // not really matter for final result. - if (!indexKeyNames.containsAll(gbKeyNameList)) { - LOG.info("Group by key has some non-indexed columns, " + - " Cannot use index " + index.getIndexName()); - return false; - } + public String findBaseTable(String baseTableName) { + return baseToIdxTableMap.get(baseTableName); + } - // If we have agg function (currently only COUNT is supported), check if its inputs are - // from index. we currently support only that. - if (aggFuncColList.size() > 0) { - if (!indexKeyNames.containsAll(aggFuncColList)){ - LOG.info("Agg Func input is not present in index key columns. Currently " + - "only agg func on index columns are supported by rewrite optimization"); - return false; - } - } + public String getIndexKey() { + return indexKey; + } - //Now that we are good to do this optimization, set parameters in context - //which would be used by transformation procedure as inputs. - if(queryHasGroupBy - && aggFuncCnt == 1 - && !aggFuncIsNotCount){ - addTable(baseTableName, index.getIndexTableName()); - }else{ - LOG.info("No valid criteria met to apply rewrite."); - return false; - } - return true; + public void setIndexKey(String indexKey) { + this.indexKey = indexKey; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java index 02216de..e917a7f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.index; -import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.Node; @@ -30,8 +29,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import java.util.List; @@ -43,154 +40,133 @@ * */ public final class RewriteCanApplyProcFactory { + + public static CheckTableScanProc canApplyOnTableScanOperator(TableScanOperator topOp) { + return new CheckTableScanProc(topOp); + } /** - * Check for conditions in FilterOperator that do not meet rewrite criteria. + * Check for conditions in TableScanOperator that do not meet rewrite + * criteria. + * */ - private static class CheckFilterProc implements NodeProcessor { + private static class CheckTableScanProc implements NodeProcessor { private TableScanOperator topOp; - public CheckFilterProc(TableScanOperator topOp) { + public CheckTableScanProc(TableScanOperator topOp) { this.topOp = topOp; } - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - FilterOperator operator = (FilterOperator)nd; - RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx; - FilterDesc conf = operator.getConf(); - //The filter operator should have a predicate of ExprNodeGenericFuncDesc type. - //This represents the comparison operator - ExprNodeDesc oldengfd = conf.getPredicate(); - if(oldengfd == null){ - canApplyCtx.setWhrClauseColsFetchException(true); - return null; - } - ExprNodeDesc backtrack = ExprNodeDescUtils.backtrack(oldengfd, operator, topOp); - if (backtrack == null) { - canApplyCtx.setWhrClauseColsFetchException(true); - return null; - } - //Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later - //if index keys contain all filter predicate columns and vice-a-versa - for (String col : backtrack.getCols()) { - canApplyCtx.getPredicateColumnsList().add(col); + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + TableScanOperator operator = (TableScanOperator) nd; + RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx; + // check ReferencedColumns to make sure that only the index column is + // selected for the following operators. + List selectColumns = operator.getConf().getReferencedColumns(); + if (selectColumns == null || selectColumns.size() != 1) { + canApplyCtx.setSelClauseColsFetchException(true); + } else { + canApplyCtx.setIndexKey(selectColumns.get(0)); } return null; } } - public static CheckFilterProc canApplyOnFilterOperator(TableScanOperator topOp) { - return new CheckFilterProc(topOp); + public static CheckGroupByProc canApplyOnGroupByOperator(TableScanOperator topOp) { + return new CheckGroupByProc(topOp); } - /** + /** * Check for conditions in GroupByOperator that do not meet rewrite criteria. * */ private static class CheckGroupByProc implements NodeProcessor { - private TableScanOperator topOp; - - public CheckGroupByProc(TableScanOperator topOp) { - this.topOp = topOp; - } - - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - GroupByOperator operator = (GroupByOperator)nd; - RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx; - //for each group-by clause in query, only one GroupByOperator of the - //GBY-RS-GBY sequence is stored in getGroupOpToInputTables - //we need to process only this operator - //Also, we do not rewrite for cases when same query branch has multiple group-by constructs - if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) && - !canApplyCtx.isQueryHasGroupBy()){ - - canApplyCtx.setQueryHasGroupBy(true); - GroupByDesc conf = operator.getConf(); - List aggrList = conf.getAggregators(); - if(aggrList != null && aggrList.size() > 0){ - for (AggregationDesc aggregationDesc : aggrList) { - canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1); - //In the current implementation, we do not support more than 1 agg funcs in group-by - if(canApplyCtx.getAggFuncCnt() > 1) { - return false; - } - String aggFunc = aggregationDesc.getGenericUDAFName(); - if(!("count".equals(aggFunc))){ - canApplyCtx.setAggFuncIsNotCount(true); - return false; - } - List para = aggregationDesc.getParameters(); - //for a valid aggregation, it needs to have non-null parameter list - if (para == null) { - canApplyCtx.setAggFuncColsFetchException(true); - } else if (para.size() == 0) { - //count(*) case - canApplyCtx.setCountOnAllCols(true); - canApplyCtx.setAggFunction("_count_of_all"); - } else if (para.size() == 1) { - ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp); - if (expr instanceof ExprNodeColumnDesc){ - //Add the columns to RewriteCanApplyCtx's selectColumnsList list - //to check later if index keys contain all select clause columns - //and vice-a-versa. We get the select column 'actual' names only here - //if we have a agg func along with group-by - //SelectOperator has internal names in its colList data structure - canApplyCtx.getSelectColumnsList().add( - ((ExprNodeColumnDesc) expr).getColumn()); - //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later - //if columns contained in agg func are index key columns - canApplyCtx.getAggFuncColList().add( - ((ExprNodeColumnDesc) expr).getColumn()); - canApplyCtx.setAggFunction("_count_of_" + - ((ExprNodeColumnDesc) expr).getColumn() + ""); - } else if(expr instanceof ExprNodeConstantDesc) { - //count(1) case - canApplyCtx.setCountOfOne(true); - canApplyCtx.setAggFunction("_count_of_1"); - } - } else { - throw new SemanticException("Invalid number of arguments for count"); - } - } - } + private TableScanOperator topOp; - //we need to have non-null group-by keys for a valid group-by operator - List keyList = conf.getKeys(); - if(keyList == null || keyList.size() == 0){ - canApplyCtx.setGbyKeysFetchException(true); - } - for (ExprNodeDesc expr : keyList) { - checkExpression(canApplyCtx, expr); - } - } - return null; - } + public CheckGroupByProc(TableScanOperator topOp) { + this.topOp = topOp; + } - private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){ - if(expr instanceof ExprNodeColumnDesc){ - //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later - //if all keys are from index columns - canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); - }else if(expr instanceof ExprNodeGenericFuncDesc){ - ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc)expr; - List childExprs = funcExpr.getChildren(); - for (ExprNodeDesc childExpr : childExprs) { - if(childExpr instanceof ExprNodeColumnDesc){ - canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); - canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn()); - }else if(childExpr instanceof ExprNodeGenericFuncDesc){ - checkExpression(canApplyCtx, childExpr); - } - } - } - } - } + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + GroupByOperator operator = (GroupByOperator) nd; + RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx; + // for each group-by clause in query, only one GroupByOperator of the + // GBY-RS-GBY sequence is stored in getGroupOpToInputTables + // we need to process only this operator + // Also, we do not rewrite for cases when same query branch has multiple + // group-by constructs + if (canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) + && !canApplyCtx.isQueryHasGroupBy()) { + canApplyCtx.setQueryHasGroupBy(true); + GroupByDesc conf = operator.getConf(); + List aggrList = conf.getAggregators(); + if (aggrList != null) { + // In the current implementation, we do not support more than 1 agg or + // 0 agg funcs in group-by + canApplyCtx.setAggFuncCnt(aggrList.size()); + if (canApplyCtx.getAggFuncCnt() != 1) { + return false; + } + // In the current implementation, we do not support func other than + // count + AggregationDesc aggregationDesc = aggrList.get(0); + String aggFunc = aggregationDesc.getGenericUDAFName(); + if (!("count".equals(aggFunc))) { + canApplyCtx.setAggFuncIsNotCount(true); + return false; + } + List para = aggregationDesc.getParameters(); + // for a valid aggregation, it needs to have non-null parameter list + if (para == null) { + canApplyCtx.setAggFuncColsFetchException(true); + } else if (para.size() == 0) { + // In count(*) case, we do not support + canApplyCtx.setCountOnAllCols(true); + return false; + } else if (para.size() == 1) { + ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp); + if (expr instanceof ExprNodeColumnDesc) { + // make sure that parameter column is exactly the indexKey + String paraColumn = ((ExprNodeColumnDesc) expr).getColumn(); + if (!canApplyCtx.getIndexKey().equals(paraColumn)) { + canApplyCtx.setAggParameterNotSupport(true); + return false; + } + } else if (expr instanceof ExprNodeConstantDesc) { + // count(1) case + // we need to differentiate count(1) (should not apply index), + // count(key) where key = 1 (should apply index) + // count(key+1) where key = 1 + String colName = ((ExprNodeConstantDesc) expr).getFoldedFromCol(); + if (colName == null || !canApplyCtx.getIndexKey().equals(colName)) { + canApplyCtx.setCountOfOne(true); + return false; + } + } else { + // In other cases such as count(function(col)), we do not support + canApplyCtx.setAggParameterNotSupport(true); + return false; + } + } else { + throw new SemanticException("Invalid number of arguments for count"); + } + } + + // we need to have non-null group-by keys for a valid group-by operator + // and now we are sure that all ExprNodeDesc in keyList should comes + // from indexKey. + // It does not matter it is a column or a constant or even a function. + List keyList = conf.getKeys(); + if (keyList == null || keyList.size() == 0) { + canApplyCtx.setGbyKeysFetchException(true); + } + } + return null; + } + } - public static CheckGroupByProc canApplyOnGroupByOperator(TableScanOperator topOp) { - return new CheckGroupByProc(topOp); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java index 0f06ec9..4fa6b52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java @@ -24,7 +24,6 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -49,7 +48,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.util.StringUtils; /** @@ -218,21 +216,18 @@ private boolean checkIfRewriteCanBeApplied(String alias, TableScanOperator topOp canApplyCtx.setBaseTableName(baseTable.getTableName()); canApplyCtx.populateRewriteVars(topOp); - Map> indexTableMap = getIndexToKeysMap(indexes); - for (Map.Entry> entry : indexTableMap.entrySet()) { + Map indexTableMap = getIndexToKeysMap(indexes); + for (Map.Entry entry : indexTableMap.entrySet()) { //we rewrite the original query using the first valid index encountered //this can be changed if we have a better mechanism to //decide which index will produce a better rewrite Index index = entry.getKey(); - Set indexKeyNames = entry.getValue(); + String indexKeyName = entry.getValue(); //break here if any valid index is found to apply rewrite - if (canApplyCtx.isIndexUsableForQueryBranchRewrite(index, indexKeyNames) && - checkIfAllRewriteCriteriaIsMet(canApplyCtx)) { - //check if aggregation function is set. - //If not, set it using the only indexed column - if (canApplyCtx.getAggFunction() == null) { - canApplyCtx.setAggFunction("_count_of_" + StringUtils.join(",", indexKeyNames) + ""); - } + if (canApplyCtx.getIndexKey() != null && canApplyCtx.getIndexKey().equals(indexKeyName) + && checkIfAllRewriteCriteriaIsMet(canApplyCtx)) { + canApplyCtx.setAggFunction("_count_of_" + indexKeyName + ""); + canApplyCtx.addTable(canApplyCtx.getBaseTableName(), index.getIndexTableName()); canApplyCtx.setIndexTableName(index.getIndexTableName()); tsOpToProcess.put(alias, canApplyCtx); return true; @@ -319,19 +314,16 @@ private boolean checkIfIndexBuiltOnAllTablePartitions(TableScanOperator tableSca * @return * @throws SemanticException */ - Map> getIndexToKeysMap(List indexTables) throws SemanticException{ + Map getIndexToKeysMap(List indexTables) throws SemanticException{ Hive hiveInstance = hiveDb; - Map> indexToKeysMap = new LinkedHashMap>(); + Map indexToKeysMap = new LinkedHashMap(); for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) { - final Set indexKeyNames = new LinkedHashSet(); Index index = indexTables.get(idxCtr); //Getting index key columns StorageDescriptor sd = index.getSd(); List idxColList = sd.getCols(); - for (FieldSchema fieldSchema : idxColList) { - indexKeyNames.add(fieldSchema.getName()); - } - assert indexKeyNames.size()==1; + assert idxColList.size()==1; + String indexKeyName = idxColList.get(0).getName(); // Check that the index schema is as expected. This code block should // catch problems of this rewrite breaking when the AggregateIndexHandler // index is changed. @@ -355,7 +347,7 @@ private boolean checkIfIndexBuiltOnAllTablePartitions(TableScanOperator tableSca // and defer the decision of using a particular index for later // this is to allow choosing a index if a better mechanism is // designed later to chose a better rewrite - indexToKeysMap.put(index, indexKeyNames); + indexToKeysMap.put(index, indexKeyName); } return indexToKeysMap; } @@ -378,7 +370,7 @@ private void rewriteOriginalQuery() throws SemanticException { RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb, canApplyCtx.getIndexTableName(), canApplyCtx.getAlias(), - canApplyCtx.getAllColumns(), canApplyCtx.getAggFunction()); + canApplyCtx.getAggFunction(), canApplyCtx.getIndexKey()); rewriteQueryCtx.invokeRewriteQueryProc(topOp); parseContext = rewriteQueryCtx.getParseContext(); parseContext.setOpParseCtx((LinkedHashMap, @@ -393,8 +385,8 @@ private void rewriteOriginalQuery() throws SemanticException { * @return */ boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx){ - if (canApplyCtx.getAggFuncCnt() > 1){ - LOG.debug("More than 1 agg funcs: " + + if (canApplyCtx.getAggFuncCnt() != 1){ + LOG.debug("No agg funcs or more than 1 agg funcs: " + "Not supported by " + getName() + " optimization."); return false; } @@ -418,11 +410,7 @@ boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx){ "of agg func, skipping " + getName() + " optimization."); return false; } - if (canApplyCtx.isWhrClauseColsFetchException()){ - LOG.debug("Got exception while locating child col refs for where clause, " - + "skipping " + getName() + " optimization."); - return false; - } + if (canApplyCtx.isSelClauseColsFetchException()){ LOG.debug("Got exception while locating child col refs for select list, " + "skipping " + getName() + " optimization."); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java index 74614f3..4d95421 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java @@ -19,8 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer.index; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -74,18 +74,27 @@ private RewriteQueryUsingAggregateIndex() { //this prevents the class from getting instantiated } + // for SEL1-SEL2-GRY-...-SEL3 + // we need to modify SelectOperator which precedes the GroupByOperator, e.g., SEL1, SEL2 + // and keep SelectOperator which comes after the GroupByOperator, e.g., SEL3 + private static boolean precedeGroupbyOp(Stack stack) { + for (Node node : stack) { + if (node instanceof GroupByOperator) + return false; + } + return true; + } + private static class NewQuerySelectSchemaProc implements NodeProcessor { public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { SelectOperator operator = (SelectOperator)nd; RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx; - List> childOps = operator.getChildOperators(); - Operator childOp = childOps.iterator().next(); //we need to set the colList, outputColumnNames, colExprMap, // rowSchema for only that SelectOperator which precedes the GroupByOperator // count(indexed_key_column) needs to be replaced by sum(`_count_of_indexed_key_column`) - if (childOp instanceof GroupByOperator){ + if (precedeGroupbyOp(stack)) { List selColList = operator.getConf().getColList(); selColList.add(rewriteQueryCtx.getAggrExprNode()); @@ -94,6 +103,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, operator.getConf().getOutputColumnNames(); selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn()); + operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(), + rewriteQueryCtx.getAggrExprNode()); + RowSchema selRS = operator.getSchema(); List selRSSignature = selRS.getSignature(); @@ -167,12 +179,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, try { StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector(); - for (String column : rewriteQueryCtx.getColumns()) { - StructField field = rowObjectInspector.getStructFieldRef(column); + StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey()); rr.put(indexTableName, field.getFieldName(), new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false)); - } } catch (SerDeException e) { LOG.error("Error while creating the RowResolver for new TableScanOperator."); LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); @@ -202,7 +212,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, (LinkedHashMap, OpParseContext>) opParseContext); ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rr, - new ArrayList(rewriteQueryCtx.getColumns())); + Arrays.asList(rewriteQueryCtx.getIndexKey())); return null; } @@ -229,27 +239,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, //We need to replace the GroupByOperator which is in //groupOpToInputTables map with the new GroupByOperator - if(rewriteQueryCtx.getParseContext().getGroupOpToInputTables().containsKey(operator)){ - List gbyKeyList = operator.getConf().getKeys(); - String gbyKeys = null; - Iterator gbyKeyListItr = gbyKeyList.iterator(); - while(gbyKeyListItr.hasNext()){ - ExprNodeDesc expr = gbyKeyListItr.next().clone(); - if(expr instanceof ExprNodeColumnDesc){ - ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr; - gbyKeys = colExpr.getColumn(); - if(gbyKeyListItr.hasNext()){ - gbyKeys = gbyKeys + ","; - } - } - } - + if (rewriteQueryCtx.getParseContext().getGroupOpToInputTables().containsKey(operator)) { + //the query contains the sum aggregation GenericUDAF String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from " + rewriteQueryCtx.getIndexName() - + " group by " + gbyKeys + " "; + + " group by " + rewriteQueryCtx.getIndexKey() + " "; //create a new ParseContext for the query to retrieve its operator tree, //and the required GroupByOperator from it ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java index d699308..8f6c6cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java @@ -22,7 +22,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.ql.exec.Operator; @@ -55,21 +54,22 @@ public final class RewriteQueryUsingAggregateIndexCtx implements NodeProcessorCtx { private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb, - String indexTableName, String alias, Set columns, String aggregateFunction) { + String indexTableName, String alias, String aggregateFunction, + String indexKey) { this.parseContext = parseContext; this.hiveDb = hiveDb; this.indexTableName = indexTableName; this.alias = alias; this.aggregateFunction = aggregateFunction; - this.columns = columns; this.opc = parseContext.getOpParseCtx(); + this.indexKey = indexKey; } public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseContext, Hive hiveDb, String indexTableName, String alias, - Set columns, String aggregateFunction) { + String aggregateFunction, String indexKey) { return new RewriteQueryUsingAggregateIndexCtx( - parseContext, hiveDb, indexTableName, alias, columns, aggregateFunction); + parseContext, hiveDb, indexTableName, alias, aggregateFunction, indexKey); } @@ -82,8 +82,8 @@ public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseC private final String indexTableName; private final String alias; private final String aggregateFunction; - private final Set columns; private ExprNodeColumnDesc aggrExprNode = null; + private String indexKey; public Map, OpParseContext> getOpc() { return opc; @@ -173,7 +173,12 @@ public String getAggregateFunction() { return aggregateFunction; } - public Set getColumns() { - return columns; + public String getIndexKey() { + return indexKey; } + + public void setIndexKey(String indexKey) { + this.indexKey = indexKey; + } + } diff --git a/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q new file mode 100644 index 0000000..9ce7d85 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q @@ -0,0 +1,173 @@ +set hive.stats.dbclass=fs; +set hive.stats.autogather=true; +set hive.cbo.enable=true; + +DROP TABLE IF EXISTS lineitem_ix; +CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix; + +CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)"); +ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD; + +explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate; + +select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate; + +set hive.optimize.index.groupby=true; + +explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate; + +select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate; + +set hive.optimize.index.groupby=false; + + +explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +set hive.optimize.index.groupby=true; + +explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month; + +explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy; + +CREATE TABLE tbl(key int, value int); +CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX tbl_key_idx ON tbl REBUILD; + +EXPLAIN select key, count(key) from tbl where key = 1 group by key; +EXPLAIN select key, count(key) from tbl group by key; + +EXPLAIN select count(1) from tbl; +EXPLAIN select count(key) from tbl; + +EXPLAIN select key FROM tbl GROUP BY key; +EXPLAIN select key FROM tbl GROUP BY value, key; +EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key; +EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key; +EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3); + +EXPLAIN select key, value FROM tbl GROUP BY value, key; +EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value; + +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key, value FROM tbl; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key; +EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key; +EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl; + +EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2; + +DROP TABLE tbl; + +CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int); +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12; + +CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); + +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD; +EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key; + +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD; +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD; +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD; +EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key; + +DROP INDEX tbl_part_index on tblpart; +DROP TABLE tblpart; + +CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; +LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl; + +CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX tbl_key_idx ON tbl REBUILD; + +set hive.optimize.index.groupby=false; +explain select key, count(key) from tbl group by key order by key; +select key, count(key) from tbl group by key order by key; +set hive.optimize.index.groupby=true; +explain select key, count(key) from tbl group by key order by key; +select key, count(key) from tbl group by key order by key; +DROP TABLE tbl; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q new file mode 100644 index 0000000..99670bd --- /dev/null +++ b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q @@ -0,0 +1,148 @@ +set hive.stats.dbclass=fs; +set hive.stats.autogather=true; +set hive.cbo.enable=true; + +DROP TABLE IF EXISTS lineitem_ix; +CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix; + +CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)"); +ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD; + +CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)"); +ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD; + +set hive.optimize.index.groupby=true; + +explain select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY; + +select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY; + +explain +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY; + +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY; + +explain +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY; + +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY; + + +explain +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY; + +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY; + +explain +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA; + +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA; + +explain +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum; + +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum; + +explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + + +explain +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + + +explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum; + +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum; + + +explain +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum; + +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out index 81f0822..203589b 100644 --- a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out @@ -1024,26 +1024,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: tbl + alias: default.default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (key = 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(key) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: 1 (type: int), _count_of_key (type: bigint) + outputColumnNames: key, _count_of_key Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: sum(_count_of_key) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out new file mode 100644 index 0000000..e5ad0bd --- /dev/null +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out @@ -0,0 +1,2637 @@ +PREHOOK: query: DROP TABLE IF EXISTS lineitem_ix +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS lineitem_ix +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_shipdate, type:string, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__.l_shipdate SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_shipdate, type:string, comment:null), ] +PREHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992-04-27 1 +1992-07-02 1 +1992-07-10 1 +1992-07-21 1 +1993-04-01 1 +1993-04-13 1 +1993-05-14 1 +1993-10-29 2 +1993-11-09 2 +1993-12-04 1 +1993-12-09 2 +1993-12-14 1 +1994-01-12 1 +1994-01-16 1 +1994-01-26 2 +1994-02-02 1 +1994-02-13 1 +1994-02-19 1 +1994-02-21 1 +1994-03-03 1 +1994-03-17 1 +1994-06-03 1 +1994-06-06 1 +1994-07-02 1 +1994-07-19 1 +1994-07-31 1 +1994-08-08 1 +1994-08-17 1 +1994-08-24 1 +1994-09-30 1 +1994-10-03 1 +1994-10-16 1 +1994-10-31 1 +1994-12-01 1 +1994-12-24 1 +1994-12-30 1 +1995-04-20 1 +1995-07-06 1 +1995-07-17 1 +1995-07-21 1 +1995-08-04 1 +1995-08-07 1 +1995-08-14 1 +1995-08-28 1 +1995-10-23 1 +1995-11-08 1 +1995-11-26 1 +1996-01-10 1 +1996-01-15 1 +1996-01-16 1 +1996-01-19 1 +1996-01-22 1 +1996-01-29 1 +1996-01-30 1 +1996-02-01 2 +1996-02-03 1 +1996-02-10 1 +1996-02-11 1 +1996-02-21 1 +1996-03-13 1 +1996-03-21 1 +1996-03-30 1 +1996-04-12 1 +1996-04-21 1 +1996-05-07 1 +1996-09-26 1 +1996-09-29 1 +1996-10-02 1 +1996-10-17 1 +1996-11-04 1 +1996-11-14 1 +1996-12-08 1 +1997-01-25 1 +1997-01-27 1 +1997-01-28 1 +1997-02-20 1 +1997-03-18 1 +1997-04-17 1 +1997-04-19 1 +1998-01-29 1 +1998-02-23 1 +1998-03-05 1 +1998-04-10 1 +1998-04-12 1 +1998-05-23 1 +1998-06-19 1 +1998-06-24 1 +1998-06-26 1 +1998-06-27 1 +1998-07-04 1 +1998-08-11 1 +1998-08-13 1 +1998-10-09 1 +1998-10-23 1 +1998-10-30 1 +PREHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992-04-27 1 +1992-07-02 1 +1992-07-10 1 +1992-07-21 1 +1993-04-01 1 +1993-04-13 1 +1993-05-14 1 +1993-10-29 2 +1993-11-09 2 +1993-12-04 1 +1993-12-09 2 +1993-12-14 1 +1994-01-12 1 +1994-01-16 1 +1994-01-26 2 +1994-02-02 1 +1994-02-13 1 +1994-02-19 1 +1994-02-21 1 +1994-03-03 1 +1994-03-17 1 +1994-06-03 1 +1994-06-06 1 +1994-07-02 1 +1994-07-19 1 +1994-07-31 1 +1994-08-08 1 +1994-08-17 1 +1994-08-24 1 +1994-09-30 1 +1994-10-03 1 +1994-10-16 1 +1994-10-31 1 +1994-12-01 1 +1994-12-24 1 +1994-12-30 1 +1995-04-20 1 +1995-07-06 1 +1995-07-17 1 +1995-07-21 1 +1995-08-04 1 +1995-08-07 1 +1995-08-14 1 +1995-08-28 1 +1995-10-23 1 +1995-11-08 1 +1995-11-26 1 +1996-01-10 1 +1996-01-15 1 +1996-01-16 1 +1996-01-19 1 +1996-01-22 1 +1996-01-29 1 +1996-01-30 1 +1996-02-01 2 +1996-02-03 1 +1996-02-10 1 +1996-02-11 1 +1996-02-21 1 +1996-03-13 1 +1996-03-21 1 +1996-03-30 1 +1996-04-12 1 +1996-04-21 1 +1996-05-07 1 +1996-09-26 1 +1996-09-29 1 +1996-10-02 1 +1996-10-17 1 +1996-11-04 1 +1996-11-14 1 +1996-12-08 1 +1997-01-25 1 +1997-01-27 1 +1997-01-28 1 +1997-02-20 1 +1997-03-18 1 +1997-04-17 1 +1997-04-19 1 +1998-01-29 1 +1998-02-23 1 +1998-03-05 1 +1998-04-10 1 +1998-04-12 1 +1998-05-23 1 +1998-06-19 1 +1998-06-24 1 +1998-06-26 1 +1998-06-27 1 +1998-07-04 1 +1998-08-11 1 +1998-08-13 1 +1998-10-09 1 +1998-10-23 1 +1998-10-30 1 +PREHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +POSTHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992 4 1 +1992 7 3 +1993 4 2 +1993 5 1 +1993 10 2 +1993 11 2 +1993 12 4 +1994 1 4 +1994 2 4 +1994 3 2 +1994 6 2 +1994 7 3 +1994 8 3 +1994 9 1 +1994 10 3 +1994 12 3 +1995 4 1 +1995 7 3 +1995 8 4 +1995 10 1 +1995 11 2 +1996 1 7 +1996 2 6 +1996 3 3 +1996 4 2 +1996 5 1 +1996 9 2 +1996 10 2 +1996 11 2 +1996 12 1 +1997 1 3 +1997 2 1 +1997 3 1 +1997 4 2 +1998 1 1 +1998 2 1 +1998 3 1 +1998 4 2 +1998 5 1 +1998 6 4 +1998 7 1 +1998 8 2 +1998 10 3 +PREHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +POSTHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _col1, _col2, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992 4 1 +1992 7 3 +1993 4 2 +1993 5 1 +1993 10 2 +1993 11 2 +1993 12 4 +1994 1 4 +1994 2 4 +1994 3 2 +1994 6 2 +1994 7 3 +1994 8 3 +1994 9 1 +1994 10 3 +1994 12 3 +1995 4 1 +1995 7 3 +1995 8 4 +1995 10 1 +1995 11 2 +1996 1 7 +1996 2 6 +1996 3 3 +1996 4 2 +1996 5 1 +1996 9 2 +1996 10 2 +1996 11 2 +1996 12 1 +1997 1 3 +1997 2 1 +1997 3 1 +1997 4 2 +1998 1 1 +1998 2 1 +1998 3 1 +1998 4 2 +1998 5 1 +1998 6 4 +1998 7 1 +1998 8 2 +1998 10 3 +PREHOOK: query: explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month +PREHOOK: type: QUERY +POSTHOOK: query: explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lastyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (year(l_shipdate) = 1997) (type: boolean) + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 23 Data size: 2559 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 12 Data size: 1335 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 12 Data size: 1335 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 667 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 667 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 667 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 667 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 6 Data size: 733 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col4 (type: int), ((_col5 - _col2) / _col2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 733 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 733 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: thisyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (year(l_shipdate) = 1998) (type: boolean) + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 23 Data size: 2559 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 12 Data size: 1335 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 12 Data size: 1335 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 667 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 667 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 10575 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5231 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: lineitem_ix + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), UDFToLong(l_orderkey) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 17330 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE tbl(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tbl +POSTHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +PREHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tbl +PREHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN select key, count(key) from tbl where key = 1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, count(key) from tbl where key = 1 group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 1 (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, count(key) from tbl group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, count(key) from tbl group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(1) from tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(1) from tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(key) from tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(key) from tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY value, key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY value, key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(key, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, value FROM tbl GROUP BY value, key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, value FROM tbl GROUP BY value, key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((value = 2) and (key = 3)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: DROP TABLE tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tblpart +POSTHOOK: query: CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tblpart +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tblpart +POSTHOOK: query: CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tblpart +POSTHOOK: Output: default@default__tblpart_tbl_part_index__ +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tblpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tblpart_tbl_part_index__ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP INDEX tbl_part_index on tblpart +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@tblpart +POSTHOOK: query: DROP INDEX tbl_part_index on tblpart +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@tblpart +PREHOOK: query: DROP TABLE tblpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tblpart +PREHOOK: Output: default@tblpart +POSTHOOK: query: DROP TABLE tblpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tblpart +POSTHOOK: Output: default@tblpart +PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tbl +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tbl +POSTHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +PREHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tbl +PREHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 1 +2 3 +3 2 +4 2 +6 1 +7 1 +PREHOOK: query: explain select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__tbl_tbl_key_idx__ +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__tbl_tbl_key_idx__ +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 1 +2 3 +3 2 +4 2 +6 1 +7 1 +PREHOOK: query: DROP TABLE tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: DROP TABLE tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out new file mode 100644 index 0000000..8d645c3 --- /dev/null +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out @@ -0,0 +1,1257 @@ +PREHOOK: query: DROP TABLE IF EXISTS lineitem_ix +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS lineitem_ix +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__._count_of_l_orderkey EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__.l_orderkey SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_orderkey, type:int, comment:null), ] +PREHOOK: query: CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__._count_of_l_partkey EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__.l_partkey SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_partkey, type:int, comment:null), ] +PREHOOK: query: explain select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 + _col1) (type: int), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +2133 1 1 +15636 1 1 +24028 1 1 +63701 1 1 +67311 1 1 +155191 1 1 +106172 1 1 +4300 1 1 +19039 1 1 +29383 1 1 +62146 1 1 +128452 1 1 +183098 1 1 +88039 1 1 +37536 1 1 +108575 1 1 +123932 1 1 +139642 1 1 +79258 1 1 +94787 1 1 +145250 1 1 +151901 1 1 +157245 1 1 +163080 1 1 +182059 1 1 +2775 1 1 +11647 1 1 +44193 1 1 +82736 1 1 +85843 1 1 +197953 1 1 +33951 1 1 +60552 1 1 +61369 1 1 +137502 1 1 +88396 1 1 +89448 1 1 +169578 1 1 +485 1 1 +30797 1 1 +85210 1 1 +119952 1 1 +120931 1 1 +161975 1 1 +119803 1 1 +12940 1 1 +22667 1 1 +126819 1 1 +175877 1 1 +2359 1 1 +20629 1 1 +54558 1 1 +67870 1 1 +94407 1 1 +186621 1 1 +86015 1 1 +1453 1 1 +59759 1 1 +73880 1 1 +115184 1 1 +173555 1 1 +20260 1 1 +21703 1 1 +40680 1 1 +87581 1 1 +173667 1 1 +178373 1 1 +7136 1 1 +35048 1 1 +82826 1 1 +94796 1 1 +102629 1 1 +139315 1 1 +175248 1 1 +18573 1 1 +37571 1 1 +92139 1 1 +104249 1 1 +115278 1 1 +137336 1 1 +37201 1 1 +45804 1 1 +55725 1 1 +64198 1 1 +179879 1 1 +196226 1 1 +34503 1 1 +62002 1 1 +65987 1 1 +96716 1 1 +103326 1 1 +195706 1 1 +123172 1 1 +135486 1 1 +49665 1 1 +77796 1 1 +119574 1 1 +40314 1 1 +44804 1 1 +109841 1 1 +PREHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey = 7) (type: boolean) + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 7 (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +7 7 +PREHOOK: query: explain +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), (l_orderkey + l_partkey) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 328000 +2 1 106172 +3 6 426418 +4 1 88039 +5 3 270043 +6 1 139642 +7 7 973580 +32 6 425147 +33 4 293374 +34 3 347422 +35 6 519350 +36 1 119803 +37 3 162426 +38 1 175877 +39 6 426444 +64 1 86015 +65 3 135092 +66 2 288739 +67 6 522264 +68 7 636998 +69 6 505146 +70 6 579033 +71 6 558240 +96 2 258658 +97 3 247035 +98 3 194959 +PREHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), sum(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 6 +2 1 2 +3 6 18 +4 1 4 +5 3 15 +6 1 6 +7 7 49 +32 6 192 +33 4 132 +34 3 102 +35 6 210 +36 1 36 +37 3 111 +38 1 38 +39 6 234 +64 1 64 +65 3 195 +66 2 132 +67 6 402 +68 7 476 +69 6 414 +70 6 420 +71 6 426 +96 2 192 +97 3 291 +98 3 294 +PREHOOK: query: explain +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +PREHOOK: type: QUERY +POSTHOOK: query: explain +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +7 7 +32 6 +33 4 +34 3 +35 6 +36 1 +37 3 +38 1 +39 6 +64 1 +65 3 +66 2 +67 6 +68 7 +69 6 +70 6 +71 6 +96 2 +97 3 +98 3 +PREHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + l_partkey) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +485 1 +1453 1 +2133 1 +2359 1 +2775 1 +4300 1 +7136 1 +11647 1 +12940 1 +15636 1 +18573 1 +19039 1 +20260 1 +20629 1 +21703 1 +22667 1 +24028 1 +29383 1 +30797 1 +33951 1 +34503 1 +35048 1 +37201 1 +37536 1 +37571 1 +40314 1 +40680 1 +44193 1 +44804 1 +45804 1 +49665 1 +54558 1 +55725 1 +59759 1 +60552 1 +61369 1 +62002 1 +62146 1 +63701 1 +64198 1 +65987 1 +67311 1 +67870 1 +73880 1 +77796 1 +79258 1 +82736 1 +82826 1 +85210 1 +85843 1 +86015 1 +87581 1 +88039 1 +88396 1 +89448 1 +92139 1 +94407 1 +94787 1 +94796 1 +96716 1 +102629 1 +103326 1 +104249 1 +106172 1 +108575 1 +109841 1 +115184 1 +115278 1 +119574 1 +119803 1 +119952 1 +120931 1 +123172 1 +123932 1 +126819 1 +128452 1 +135486 1 +137336 1 +137502 1 +139315 1 +139642 1 +145250 1 +151901 1 +155191 1 +157245 1 +161975 1 +163080 1 +169578 1 +173555 1 +173667 1 +175248 1 +175877 1 +178373 1 +179879 1 +182059 1 +183098 1 +186621 1 +195706 1 +196226 1 +197953 1 +PREHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + 1) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1523 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +2 6 +3 1 +4 6 +5 1 +6 3 +7 1 +8 7 +33 6 +34 4 +35 3 +36 6 +37 1 +38 3 +39 1 +40 6 +65 1 +66 3 +67 2 +68 6 +69 7 +70 6 +71 6 +72 6 +97 2 +98 3 +99 3 +PREHOOK: query: explain +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + 1) (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +2 6 +3 1 +4 6 +5 1 +6 3 +7 1 +8 7 +33 6 +34 4 +35 3 +36 6 +37 1 +38 3 +39 1 +40 6 +65 1 +66 3 +67 2 +68 6 +69 7 +70 6 +71 6 +72 6 +97 2 +98 3 +99 3 +PREHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey = 7) (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 8 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +8 7 +PREHOOK: query: explain +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey = 7) (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 8 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 378 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +7 1