diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java index 9ffa708..b56b608 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java @@ -21,17 +21,16 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -63,47 +62,31 @@ public static RewriteCanApplyCtx getInstance(ParseContext parseContext){ } // Rewrite Variables - private int aggFuncCnt = 0; + private boolean selClauseColsFetchException = false; private boolean queryHasGroupBy = false; private boolean aggFuncIsNotCount = false; - private boolean aggFuncColsFetchException = false; - private boolean whrClauseColsFetchException = false; - private boolean selClauseColsFetchException = false; - private boolean gbyKeysFetchException = false; - private boolean countOnAllCols = false; - private boolean countOfOne = false; - private boolean queryHasMultipleTables = false; - - //Data structures that are populated in the RewriteCanApplyProcFactory - //methods to check if the index key meets all criteria - private Set selectColumnsList = new LinkedHashSet(); - private Set predicateColumnsList = new LinkedHashSet(); - private Set gbKeyNameList = new LinkedHashSet(); - private Set aggFuncColList = new LinkedHashSet(); + private boolean aggParameterException = false; + + //The most important, indexKey + private String indexKey; private final ParseContext parseContext; private String alias; private String baseTableName; private String indexTableName; private String aggFunction; + + private TableScanOperator tableScanOperator; + private List selectOperators; + private List groupByOperators; void resetCanApplyCtx(){ - setAggFuncCnt(0); setQueryHasGroupBy(false); setAggFuncIsNotCount(false); - setAggFuncColsFetchException(false); - setWhrClauseColsFetchException(false); setSelClauseColsFetchException(false); - setGbyKeysFetchException(false); - setCountOnAllCols(false); - setCountOfOne(false); - setQueryHasMultipleTables(false); - selectColumnsList.clear(); - predicateColumnsList.clear(); - gbKeyNameList.clear(); - aggFuncColList.clear(); setBaseTableName(""); setAggFunction(""); + setIndexKey(""); } public boolean isQueryHasGroupBy() { @@ -134,22 +117,6 @@ public String getAggFunction() { return aggFunction; } - public void setAggFuncColsFetchException(boolean aggFuncColsFetchException) { - this.aggFuncColsFetchException = aggFuncColsFetchException; - } - - public boolean isAggFuncColsFetchException() { - return aggFuncColsFetchException; - } - - public void setWhrClauseColsFetchException(boolean whrClauseColsFetchException) { - this.whrClauseColsFetchException = whrClauseColsFetchException; - } - - public boolean isWhrClauseColsFetchException() { - return whrClauseColsFetchException; - } - public void setSelClauseColsFetchException(boolean selClauseColsFetchException) { this.selClauseColsFetchException = selClauseColsFetchException; } @@ -158,78 +125,6 @@ public boolean isSelClauseColsFetchException() { return selClauseColsFetchException; } - public void setGbyKeysFetchException(boolean gbyKeysFetchException) { - this.gbyKeysFetchException = gbyKeysFetchException; - } - - public boolean isGbyKeysFetchException() { - return gbyKeysFetchException; - } - - public void setCountOnAllCols(boolean countOnAllCols) { - this.countOnAllCols = countOnAllCols; - } - - public boolean isCountOnAllCols() { - return countOnAllCols; - } - - public void setCountOfOne(boolean countOfOne) { - this.countOfOne = countOfOne; - } - - public boolean isCountOfOne() { - return countOfOne; - } - - public void setQueryHasMultipleTables(boolean queryHasMultipleTables) { - this.queryHasMultipleTables = queryHasMultipleTables; - } - - public boolean isQueryHasMultipleTables() { - return queryHasMultipleTables; - } - - public Set getSelectColumnsList() { - return selectColumnsList; - } - - public void setSelectColumnsList(Set selectColumnsList) { - this.selectColumnsList = selectColumnsList; - } - - public Set getPredicateColumnsList() { - return predicateColumnsList; - } - - public void setPredicateColumnsList(Set predicateColumnsList) { - this.predicateColumnsList = predicateColumnsList; - } - - public Set getGbKeyNameList() { - return gbKeyNameList; - } - - public void setGbKeyNameList(Set gbKeyNameList) { - this.gbKeyNameList = gbKeyNameList; - } - - public Set getAggFuncColList() { - return aggFuncColList; - } - - public void setAggFuncColList(Set aggFuncColList) { - this.aggFuncColList = aggFuncColList; - } - - public int getAggFuncCnt() { - return aggFuncCnt; - } - - public void setAggFuncCnt(int aggFuncCnt) { - this.aggFuncCnt = aggFuncCnt; - } - public String getAlias() { return alias; } @@ -258,15 +153,6 @@ public ParseContext getParseContext() { return parseContext; } - public Set getAllColumns() { - Set allColumns = new LinkedHashSet(selectColumnsList); - allColumns.addAll(predicateColumnsList); - allColumns.addAll(gbKeyNameList); - allColumns.addAll(aggFuncColList); - return allColumns; - } - - /** * This method walks all the nodes starting from topOp TableScanOperator node * and invokes methods from {@link RewriteCanApplyProcFactory} for each of the rules @@ -282,10 +168,14 @@ public ParseContext getParseContext() { void populateRewriteVars(TableScanOperator topOp) throws SemanticException{ Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), - RewriteCanApplyProcFactory.canApplyOnFilterOperator(topOp)); - opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"), - RewriteCanApplyProcFactory.canApplyOnGroupByOperator(topOp)); + //^TS%[(SEL%)|(FIL%)]*GRY%[(FIL%)]*RS%[(FIL%)]*GRY% + opRules.put( + new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%[(" + + SelectOperator.getOperatorName() + "%)|(" + FilterOperator.getOperatorName() + "%)]*" + + GroupByOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() + "%]*" + + ReduceSinkOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() + + "%]*" + GroupByOperator.getOperatorName() + "%"), + RewriteCanApplyProcFactory.canApplyOnTableScanOperator(topOp)); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along @@ -323,67 +213,53 @@ public Object process(Node nd, Stack stack, //Map for base table to index table mapping //TableScan operator for base table will be modified to read from index table - private final Map baseToIdxTableMap = - new HashMap();; - + private final Map baseToIdxTableMap = new HashMap();; public void addTable(String baseTableName, String indexTableName) { - baseToIdxTableMap.put(baseTableName, indexTableName); - } + baseToIdxTableMap.put(baseTableName, indexTableName); + } - public String findBaseTable(String baseTableName) { - return baseToIdxTableMap.get(baseTableName); - } + public String findBaseTable(String baseTableName) { + return baseToIdxTableMap.get(baseTableName); + } + public String getIndexKey() { + return indexKey; + } - boolean isIndexUsableForQueryBranchRewrite(Index index, Set indexKeyNames){ + public void setIndexKey(String indexKey) { + this.indexKey = indexKey; + } - //-------------------------------------------- - //Check if all columns in select list are part of index key columns - if (!indexKeyNames.containsAll(selectColumnsList)) { - LOG.info("Select list has non index key column : " + - " Cannot use index " + index.getIndexName()); - return false; - } + public TableScanOperator getTableScanOperator() { + return tableScanOperator; + } - //-------------------------------------------- - // Check if all columns in where predicate are part of index key columns - if (!indexKeyNames.containsAll(predicateColumnsList)) { - LOG.info("Predicate column ref list has non index key column : " + - " Cannot use index " + index.getIndexName()); - return false; - } + public void setTableScanOperator(TableScanOperator tableScanOperator) { + this.tableScanOperator = tableScanOperator; + } - //-------------------------------------------- - // For group by, we need to check if all keys are from index columns - // itself. Here GB key order can be different than index columns but that does - // not really matter for final result. - if (!indexKeyNames.containsAll(gbKeyNameList)) { - LOG.info("Group by key has some non-indexed columns, " + - " Cannot use index " + index.getIndexName()); - return false; - } + public List getSelectOperators() { + return selectOperators; + } - // If we have agg function (currently only COUNT is supported), check if its inputs are - // from index. we currently support only that. - if (aggFuncColList.size() > 0) { - if (!indexKeyNames.containsAll(aggFuncColList)){ - LOG.info("Agg Func input is not present in index key columns. Currently " + - "only agg func on index columns are supported by rewrite optimization"); - return false; - } - } + public void setSelectOperators(List selectOperators) { + this.selectOperators = selectOperators; + } - //Now that we are good to do this optimization, set parameters in context - //which would be used by transformation procedure as inputs. - if(queryHasGroupBy - && aggFuncCnt == 1 - && !aggFuncIsNotCount){ - addTable(baseTableName, index.getIndexTableName()); - }else{ - LOG.info("No valid criteria met to apply rewrite."); - return false; - } - return true; + public List getGroupByOperators() { + return groupByOperators; + } + + public void setGroupByOperators(List groupByOperators) { + this.groupByOperators = groupByOperators; + } + + public void setAggParameterException(boolean aggParameterException) { + this.aggParameterException = aggParameterException; + } + + public boolean isAggParameterException() { + return aggParameterException; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java index 02216de..94be0a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java @@ -18,8 +18,9 @@ package org.apache.hadoop.hive.ql.optimizer.index; -import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; @@ -31,9 +32,10 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import java.util.ArrayList; import java.util.List; import java.util.Stack; @@ -43,154 +45,107 @@ * */ public final class RewriteCanApplyProcFactory { + + public static CheckTableScanProc canApplyOnTableScanOperator(TableScanOperator topOp) { + return new CheckTableScanProc(); + } + + + private static class CheckTableScanProc implements NodeProcessor { - /** - * Check for conditions in FilterOperator that do not meet rewrite criteria. - */ - private static class CheckFilterProc implements NodeProcessor { - - private TableScanOperator topOp; - - public CheckFilterProc(TableScanOperator topOp) { - this.topOp = topOp; + public CheckTableScanProc() { } - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - FilterOperator operator = (FilterOperator)nd; - RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx; - FilterDesc conf = operator.getConf(); - //The filter operator should have a predicate of ExprNodeGenericFuncDesc type. - //This represents the comparison operator - ExprNodeDesc oldengfd = conf.getPredicate(); - if(oldengfd == null){ - canApplyCtx.setWhrClauseColsFetchException(true); - return null; - } - ExprNodeDesc backtrack = ExprNodeDescUtils.backtrack(oldengfd, operator, topOp); - if (backtrack == null) { - canApplyCtx.setWhrClauseColsFetchException(true); - return null; - } - //Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later - //if index keys contain all filter predicate columns and vice-a-versa - for (String col : backtrack.getCols()) { - canApplyCtx.getPredicateColumnsList().add(col); + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) + throws SemanticException { + RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx; + for (Node node : stack) { + // For table scan operator, + // check ReferencedColumns to make sure that only the index column is + // selected for the following operators. + if (node instanceof TableScanOperator) { + TableScanOperator ts = (TableScanOperator) node; + canApplyCtx.setTableScanOperator(ts); + List selectColumns = ts.getConf().getReferencedColumns(); + if (selectColumns == null || selectColumns.size() != 1) { + canApplyCtx.setSelClauseColsFetchException(true); + return null; + } else { + canApplyCtx.setIndexKey(selectColumns.get(0)); + } + } else if (node instanceof SelectOperator) { + // For select operators in the stack, we just add them + if (canApplyCtx.getSelectOperators() == null) { + canApplyCtx.setSelectOperators(new ArrayList()); + } + canApplyCtx.getSelectOperators().add((SelectOperator) node); + } else if (node instanceof GroupByOperator) { + if (canApplyCtx.getGroupByOperators() == null) { + canApplyCtx.setGroupByOperators(new ArrayList()); + } + // According to the pre-order, + // the first GroupbyOperator is the one before RS + // and the second one is the one after RS + GroupByOperator operator = (GroupByOperator) node; + canApplyCtx.getGroupByOperators().add(operator); + if (!canApplyCtx.isQueryHasGroupBy()) { + canApplyCtx.setQueryHasGroupBy(true); + GroupByDesc conf = operator.getConf(); + List aggrList = conf.getAggregators(); + if (aggrList == null || aggrList.size() != 1 + || !("count".equals(aggrList.get(0).getGenericUDAFName()))) { + // In the current implementation, we make sure that only count is + // in the function + canApplyCtx.setAggFuncIsNotCount(true); + return null; + } else { + List para = aggrList.get(0).getParameters(); + if (para == null || para.size() == 0 || para.size() > 1) { + canApplyCtx.setAggParameterException(true); + return null; + } else { + ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, + (Operator) stack.get(0)); + if (!checkExpression(expr, canApplyCtx.getIndexKey())) { + canApplyCtx.setAggParameterException(true); + return null; + } + } + } + } + } } return null; } + + private boolean checkExpression(ExprNodeDesc expr, String indexKey) { + if (expr instanceof ExprNodeColumnDesc) { + // we do not need to worry if + // it is ExprNodeColumnDesc + // that is, count(key) + return true; + } else if (expr instanceof ExprNodeConstantDesc) { + // we need to distinguish + // count(1) + // count(key) where key=1 + ExprNodeConstantDesc exprNodeConstantDesc = (ExprNodeConstantDesc) expr; + if (indexKey.equals(exprNodeConstantDesc.getFoldedFromCol())) { + return true; + } + } else if (expr instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) expr; + for (ExprNodeDesc childExpr : funcExpr.getChildren()) { + if (checkExpression(childExpr, indexKey)) { + // this is a function with indexkey + // that is, count(function(key)) + return true; + } + } + } else { + // we do not support other cases for now. + return false; + } + return false; + } } - - public static CheckFilterProc canApplyOnFilterOperator(TableScanOperator topOp) { - return new CheckFilterProc(topOp); - } - - /** - * Check for conditions in GroupByOperator that do not meet rewrite criteria. - * - */ - private static class CheckGroupByProc implements NodeProcessor { - - private TableScanOperator topOp; - - public CheckGroupByProc(TableScanOperator topOp) { - this.topOp = topOp; - } - - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - GroupByOperator operator = (GroupByOperator)nd; - RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx; - //for each group-by clause in query, only one GroupByOperator of the - //GBY-RS-GBY sequence is stored in getGroupOpToInputTables - //we need to process only this operator - //Also, we do not rewrite for cases when same query branch has multiple group-by constructs - if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) && - !canApplyCtx.isQueryHasGroupBy()){ - - canApplyCtx.setQueryHasGroupBy(true); - GroupByDesc conf = operator.getConf(); - List aggrList = conf.getAggregators(); - if(aggrList != null && aggrList.size() > 0){ - for (AggregationDesc aggregationDesc : aggrList) { - canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1); - //In the current implementation, we do not support more than 1 agg funcs in group-by - if(canApplyCtx.getAggFuncCnt() > 1) { - return false; - } - String aggFunc = aggregationDesc.getGenericUDAFName(); - if(!("count".equals(aggFunc))){ - canApplyCtx.setAggFuncIsNotCount(true); - return false; - } - List para = aggregationDesc.getParameters(); - //for a valid aggregation, it needs to have non-null parameter list - if (para == null) { - canApplyCtx.setAggFuncColsFetchException(true); - } else if (para.size() == 0) { - //count(*) case - canApplyCtx.setCountOnAllCols(true); - canApplyCtx.setAggFunction("_count_of_all"); - } else if (para.size() == 1) { - ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp); - if (expr instanceof ExprNodeColumnDesc){ - //Add the columns to RewriteCanApplyCtx's selectColumnsList list - //to check later if index keys contain all select clause columns - //and vice-a-versa. We get the select column 'actual' names only here - //if we have a agg func along with group-by - //SelectOperator has internal names in its colList data structure - canApplyCtx.getSelectColumnsList().add( - ((ExprNodeColumnDesc) expr).getColumn()); - //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later - //if columns contained in agg func are index key columns - canApplyCtx.getAggFuncColList().add( - ((ExprNodeColumnDesc) expr).getColumn()); - canApplyCtx.setAggFunction("_count_of_" + - ((ExprNodeColumnDesc) expr).getColumn() + ""); - } else if(expr instanceof ExprNodeConstantDesc) { - //count(1) case - canApplyCtx.setCountOfOne(true); - canApplyCtx.setAggFunction("_count_of_1"); - } - } else { - throw new SemanticException("Invalid number of arguments for count"); - } - } - } - - //we need to have non-null group-by keys for a valid group-by operator - List keyList = conf.getKeys(); - if(keyList == null || keyList.size() == 0){ - canApplyCtx.setGbyKeysFetchException(true); - } - for (ExprNodeDesc expr : keyList) { - checkExpression(canApplyCtx, expr); - } - } - return null; - } - - private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){ - if(expr instanceof ExprNodeColumnDesc){ - //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later - //if all keys are from index columns - canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); - }else if(expr instanceof ExprNodeGenericFuncDesc){ - ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc)expr; - List childExprs = funcExpr.getChildren(); - for (ExprNodeDesc childExpr : childExprs) { - if(childExpr instanceof ExprNodeColumnDesc){ - canApplyCtx.getGbKeyNameList().addAll(expr.getCols()); - canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn()); - }else if(childExpr instanceof ExprNodeGenericFuncDesc){ - checkExpression(canApplyCtx, childExpr); - } - } - } - } - } - - public static CheckGroupByProc canApplyOnGroupByOperator(TableScanOperator topOp) { - return new CheckGroupByProc(topOp); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java index 0f06ec9..6020db3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java @@ -21,10 +21,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -49,7 +46,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.util.StringUtils; /** @@ -153,10 +149,6 @@ private String getName() { * @throws SemanticException */ boolean shouldApplyOptimization() throws SemanticException { - if (ifQueryHasMultipleTables()) { - //We do not apply this optimization for this case as of now. - return false; - } Map> tableToIndex = getIndexesForRewrite(); if (tableToIndex.isEmpty()) { LOG.debug("No Valid Index Found to apply Rewrite, " + @@ -170,19 +162,14 @@ boolean shouldApplyOptimization() throws SemanticException { * the tsOpToProcess to apply rewrite later on. * */ Map topToTable = parseContext.getTopToTable(); - Map> topOps = parseContext.getTopOps(); - for (Map.Entry> entry : parseContext.getTopOps().entrySet()) { - String alias = entry.getKey(); TableScanOperator topOp = (TableScanOperator) entry.getValue(); - Table table = topToTable.get(topOp); List indexes = tableToIndex.get(table); if (indexes.isEmpty()) { continue; } - if (table.isPartitioned()) { //if base table has partitions, we need to check if index is built for //all partitions. If not, then we do not apply the optimization @@ -196,7 +183,6 @@ boolean shouldApplyOptimization() throws SemanticException { //if there are no partitions on base table checkIfRewriteCanBeApplied(alias, topOp, table, indexes); } - return !tsOpToProcess.isEmpty(); } @@ -213,26 +199,21 @@ private boolean checkIfRewriteCanBeApplied(String alias, TableScanOperator topOp Table baseTable, List indexes) throws SemanticException{ //Context for checking if this optimization can be applied to the input query RewriteCanApplyCtx canApplyCtx = RewriteCanApplyCtx.getInstance(parseContext); - canApplyCtx.setAlias(alias); canApplyCtx.setBaseTableName(baseTable.getTableName()); canApplyCtx.populateRewriteVars(topOp); - - Map> indexTableMap = getIndexToKeysMap(indexes); - for (Map.Entry> entry : indexTableMap.entrySet()) { + Map indexTableMap = getIndexToKeysMap(indexes); + for (Map.Entry entry : indexTableMap.entrySet()) { //we rewrite the original query using the first valid index encountered //this can be changed if we have a better mechanism to //decide which index will produce a better rewrite Index index = entry.getKey(); - Set indexKeyNames = entry.getValue(); + String indexKeyName = entry.getValue(); //break here if any valid index is found to apply rewrite - if (canApplyCtx.isIndexUsableForQueryBranchRewrite(index, indexKeyNames) && - checkIfAllRewriteCriteriaIsMet(canApplyCtx)) { - //check if aggregation function is set. - //If not, set it using the only indexed column - if (canApplyCtx.getAggFunction() == null) { - canApplyCtx.setAggFunction("_count_of_" + StringUtils.join(",", indexKeyNames) + ""); - } + if (canApplyCtx.getIndexKey() != null && canApplyCtx.getIndexKey().equals(indexKeyName) + && checkIfAllRewriteCriteriaIsMet(canApplyCtx)) { + canApplyCtx.setAggFunction("_count_of_" + indexKeyName + ""); + canApplyCtx.addTable(canApplyCtx.getBaseTableName(), index.getIndexTableName()); canApplyCtx.setIndexTableName(index.getIndexTableName()); tsOpToProcess.put(alias, canApplyCtx); return true; @@ -242,27 +223,6 @@ private boolean checkIfRewriteCanBeApplied(String alias, TableScanOperator topOp } /** - * This block of code iterates over the topToTable map from ParseContext - * to determine if the query has a scan over multiple tables. - * @return - */ - boolean ifQueryHasMultipleTables(){ - Map topToTable = parseContext.getTopToTable(); - Iterator valuesItr = topToTable.values().iterator(); - Set tableNameSet = new HashSet(); - while(valuesItr.hasNext()){ - Table table = valuesItr.next(); - tableNameSet.add(table.getTableName()); - } - if(tableNameSet.size() > 1){ - LOG.debug("Query has more than one table " + - "that is not supported with " + getName() + " optimization."); - return true; - } - return false; - } - - /** * Get a list of indexes which can be used for rewrite. * @return * @throws SemanticException @@ -319,19 +279,16 @@ private boolean checkIfIndexBuiltOnAllTablePartitions(TableScanOperator tableSca * @return * @throws SemanticException */ - Map> getIndexToKeysMap(List indexTables) throws SemanticException{ + Map getIndexToKeysMap(List indexTables) throws SemanticException{ Hive hiveInstance = hiveDb; - Map> indexToKeysMap = new LinkedHashMap>(); + Map indexToKeysMap = new LinkedHashMap(); for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) { - final Set indexKeyNames = new LinkedHashSet(); Index index = indexTables.get(idxCtr); //Getting index key columns StorageDescriptor sd = index.getSd(); List idxColList = sd.getCols(); - for (FieldSchema fieldSchema : idxColList) { - indexKeyNames.add(fieldSchema.getName()); - } - assert indexKeyNames.size()==1; + assert idxColList.size()==1; + String indexKeyName = idxColList.get(0).getName(); // Check that the index schema is as expected. This code block should // catch problems of this rewrite breaking when the AggregateIndexHandler // index is changed. @@ -355,7 +312,7 @@ private boolean checkIfIndexBuiltOnAllTablePartitions(TableScanOperator tableSca // and defer the decision of using a particular index for later // this is to allow choosing a index if a better mechanism is // designed later to chose a better rewrite - indexToKeysMap.put(index, indexKeyNames); + indexToKeysMap.put(index, indexKeyName); } return indexToKeysMap; } @@ -366,20 +323,11 @@ private boolean checkIfIndexBuiltOnAllTablePartitions(TableScanOperator tableSca * @throws SemanticException * */ - @SuppressWarnings("unchecked") private void rewriteOriginalQuery() throws SemanticException { - Map> topOpMap = parseContext.getTopOps(); - Iterator tsOpItr = tsOpToProcess.keySet().iterator(); - - for (Map.Entry entry : tsOpToProcess.entrySet()) { - String alias = entry.getKey(); - RewriteCanApplyCtx canApplyCtx = entry.getValue(); - TableScanOperator topOp = (TableScanOperator) topOpMap.get(alias); + for (RewriteCanApplyCtx canApplyCtx : tsOpToProcess.values()) { RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = - RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb, - canApplyCtx.getIndexTableName(), canApplyCtx.getAlias(), - canApplyCtx.getAllColumns(), canApplyCtx.getAggFunction()); - rewriteQueryCtx.invokeRewriteQueryProc(topOp); + RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb, canApplyCtx); + rewriteQueryCtx.invokeRewriteQueryProc(); parseContext = rewriteQueryCtx.getParseContext(); parseContext.setOpParseCtx((LinkedHashMap, OpParseContext>) rewriteQueryCtx.getOpc()); @@ -392,45 +340,20 @@ private void rewriteOriginalQuery() throws SemanticException { * This method logs the reason for which we cannot apply the rewrite optimization. * @return */ - boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx){ - if (canApplyCtx.getAggFuncCnt() > 1){ - LOG.debug("More than 1 agg funcs: " + - "Not supported by " + getName() + " optimization."); - return false; - } - if (canApplyCtx.isAggFuncIsNotCount()){ - LOG.debug("Agg func other than count is " + - "not supported by " + getName() + " optimization."); - return false; - } - if (canApplyCtx.isCountOnAllCols()){ - LOG.debug("Currently count function needs group by on key columns. This is a count(*) case.," - + "Cannot apply this " + getName() + " optimization."); - return false; - } - if (canApplyCtx.isCountOfOne()){ - LOG.debug("Currently count function needs group by on key columns. This is a count(1) case.," - + "Cannot apply this " + getName() + " optimization."); - return false; - } - if (canApplyCtx.isAggFuncColsFetchException()){ - LOG.debug("Got exception while locating child col refs " + - "of agg func, skipping " + getName() + " optimization."); - return false; - } - if (canApplyCtx.isWhrClauseColsFetchException()){ - LOG.debug("Got exception while locating child col refs for where clause, " - + "skipping " + getName() + " optimization."); + boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx) { + if (canApplyCtx.isSelClauseColsFetchException()) { + LOG.debug("Got exception while locating child col refs for select list, " + "skipping " + + getName() + " optimization."); return false; } - if (canApplyCtx.isSelClauseColsFetchException()){ - LOG.debug("Got exception while locating child col refs for select list, " - + "skipping " + getName() + " optimization."); + if (canApplyCtx.isAggFuncIsNotCount()) { + LOG.debug("Agg func other than count is " + "not supported by " + getName() + + " optimization."); return false; } - if (canApplyCtx.isGbyKeysFetchException()){ - LOG.debug("Got exception while locating child col refs for GroupBy key, " - + "skipping " + getName() + " optimization."); + if (canApplyCtx.isAggParameterException()) { + LOG.debug("Got exception while locating parameter refs for aggregation, " + "skipping " + + getName() + " optimization."); return false; } return true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java deleted file mode 100644 index 74614f3..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java +++ /dev/null @@ -1,319 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.index; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Stack; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory; -import org.apache.hadoop.hive.ql.parse.OpParseContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.TableScanDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; - -/** - * This class defines a procedure factory used to rewrite the operator plan - * Each method replaces the necessary base table data structures with - * the index table data structures for each operator. - */ -public final class RewriteQueryUsingAggregateIndex { - private static final Log LOG = LogFactory.getLog(RewriteQueryUsingAggregateIndex.class.getName()); - - private RewriteQueryUsingAggregateIndex() { - //this prevents the class from getting instantiated - } - - private static class NewQuerySelectSchemaProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - SelectOperator operator = (SelectOperator)nd; - RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx; - List> childOps = operator.getChildOperators(); - Operator childOp = childOps.iterator().next(); - - //we need to set the colList, outputColumnNames, colExprMap, - // rowSchema for only that SelectOperator which precedes the GroupByOperator - // count(indexed_key_column) needs to be replaced by sum(`_count_of_indexed_key_column`) - if (childOp instanceof GroupByOperator){ - List selColList = - operator.getConf().getColList(); - selColList.add(rewriteQueryCtx.getAggrExprNode()); - - List selOutputColNames = - operator.getConf().getOutputColumnNames(); - selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn()); - - RowSchema selRS = operator.getSchema(); - List selRSSignature = - selRS.getSignature(); - //Need to create a new type for Column[_count_of_indexed_key_column] node - PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo("bigint"); - pti.setTypeName("bigint"); - ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "", false); - selRSSignature.add(newCI); - selRS.setSignature((ArrayList) selRSSignature); - operator.setSchema(selRS); - } - return null; - } - } - - public static NewQuerySelectSchemaProc getNewQuerySelectSchemaProc(){ - return new NewQuerySelectSchemaProc(); - } - - - /** - * This processor replaces the original TableScanOperator with - * the new TableScanOperator and metadata that scans over the - * index table rather than scanning over the original table. - * - */ - private static class ReplaceTableScanOpProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - TableScanOperator scanOperator = (TableScanOperator)nd; - RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx; - String alias = rewriteQueryCtx.getAlias(); - - //Need to remove the original TableScanOperators from these data structures - // and add new ones - Map topToTable = - rewriteQueryCtx.getParseContext().getTopToTable(); - Map> topOps = - rewriteQueryCtx.getParseContext().getTopOps(); - Map, OpParseContext> opParseContext = - rewriteQueryCtx.getParseContext().getOpParseCtx(); - - //need this to set rowResolver for new scanOperator - OpParseContext operatorContext = opParseContext.get(scanOperator); - - //remove original TableScanOperator - topOps.remove(alias); - topToTable.remove(scanOperator); - opParseContext.remove(scanOperator); - - //construct a new descriptor for the index table scan - TableScanDesc indexTableScanDesc = new TableScanDesc(); - indexTableScanDesc.setGatherStats(false); - - String indexTableName = rewriteQueryCtx.getIndexName(); - Table indexTableHandle = null; - try { - indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName); - } catch (HiveException e) { - LOG.error("Error while getting the table handle for index table."); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - - String k = indexTableName + Path.SEPARATOR; - indexTableScanDesc.setStatsAggPrefix(k); - scanOperator.setConf(indexTableScanDesc); - - //Construct the new RowResolver for the new TableScanOperator - RowResolver rr = new RowResolver(); - try { - StructObjectInspector rowObjectInspector = - (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector(); - for (String column : rewriteQueryCtx.getColumns()) { - StructField field = rowObjectInspector.getStructFieldRef(column); - rr.put(indexTableName, field.getFieldName(), new ColumnInfo(field.getFieldName(), - TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), - indexTableName, false)); - } - } catch (SerDeException e) { - LOG.error("Error while creating the RowResolver for new TableScanOperator."); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - - //Set row resolver for new table - operatorContext.setRowResolver(rr); - - String newAlias = indexTableName; - int index = alias.lastIndexOf(":"); - if (index >= 0) { - newAlias = alias.substring(0, index) + ":" + indexTableName; - } - - //Scan operator now points to other table - topToTable.put(scanOperator, indexTableHandle); - scanOperator.getConf().setAlias(newAlias); - scanOperator.setAlias(indexTableName); - topOps.put(newAlias, scanOperator); - opParseContext.put(scanOperator, operatorContext); - rewriteQueryCtx.getParseContext().setTopToTable( - (HashMap) topToTable); - rewriteQueryCtx.getParseContext().setTopOps( - (HashMap>) topOps); - rewriteQueryCtx.getParseContext().setOpParseCtx( - (LinkedHashMap, OpParseContext>) opParseContext); - - ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rr, - new ArrayList(rewriteQueryCtx.getColumns())); - - return null; - } - } - - public static ReplaceTableScanOpProc getReplaceTableScanProc(){ - return new ReplaceTableScanOpProc(); - } - - /** - * We need to replace the count(indexed_column_key) GenericUDAF aggregation function for - * group-by construct to "sum" GenericUDAF. - * This processor creates a new operator tree for a sample query that creates a GroupByOperator - * with sum aggregation function and uses that GroupByOperator information to replace - * the original GroupByOperator aggregation information. - * It replaces the AggregationDesc (aggregation descriptor) of the old GroupByOperator with the - * new Aggregation Desc of the new GroupByOperator. - */ - private static class NewQueryGroupbySchemaProc implements NodeProcessor { - public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, - Object... nodeOutputs) throws SemanticException { - GroupByOperator operator = (GroupByOperator)nd; - RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx; - - //We need to replace the GroupByOperator which is in - //groupOpToInputTables map with the new GroupByOperator - if(rewriteQueryCtx.getParseContext().getGroupOpToInputTables().containsKey(operator)){ - List gbyKeyList = operator.getConf().getKeys(); - String gbyKeys = null; - Iterator gbyKeyListItr = gbyKeyList.iterator(); - while(gbyKeyListItr.hasNext()){ - ExprNodeDesc expr = gbyKeyListItr.next().clone(); - if(expr instanceof ExprNodeColumnDesc){ - ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr; - gbyKeys = colExpr.getColumn(); - if(gbyKeyListItr.hasNext()){ - gbyKeys = gbyKeys + ","; - } - } - } - - - //the query contains the sum aggregation GenericUDAF - String selReplacementCommand = "select sum(`" - + rewriteQueryCtx.getAggregateFunction() + "`)" - + " from " + rewriteQueryCtx.getIndexName() - + " group by " + gbyKeys + " "; - //create a new ParseContext for the query to retrieve its operator tree, - //and the required GroupByOperator from it - ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree( - rewriteQueryCtx.getParseContext().getConf(), - selReplacementCommand); - - //we get our new GroupByOperator here - Map> newGbyOpMap = newDAGContext.getGroupOpToInputTables(); - GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next(); - GroupByDesc oldConf = operator.getConf(); - - //we need this information to set the correct colList, outputColumnNames in SelectOperator - ExprNodeColumnDesc aggrExprNode = null; - - //Construct the new AggregationDesc to get rid of the current - //internal names and replace them with new internal names - //as required by the operator tree - GroupByDesc newConf = newGbyOperator.getConf(); - List newAggrList = newConf.getAggregators(); - if(newAggrList != null && newAggrList.size() > 0){ - for (AggregationDesc aggregationDesc : newAggrList) { - rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator()); - aggrExprNode = (ExprNodeColumnDesc)aggregationDesc.getParameters().get(0); - rewriteQueryCtx.setAggrExprNode(aggrExprNode); - } - } - - //Now the GroupByOperator has the new AggregationList; sum(`_count_of_indexed_key`) - //instead of count(indexed_key) - OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator); - RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver(); - gbyOPC.setRowResolver(gbyRR); - rewriteQueryCtx.getOpc().put(operator, gbyOPC); - - oldConf.setAggregators((ArrayList) newAggrList); - operator.setConf(oldConf); - - - }else{ - //we just need to reset the GenericUDAFEvaluator and its name for this - //GroupByOperator whose parent is the ReduceSinkOperator - GroupByDesc childConf = (GroupByDesc) operator.getConf(); - List childAggrList = childConf.getAggregators(); - if(childAggrList != null && childAggrList.size() > 0){ - for (AggregationDesc aggregationDesc : childAggrList) { - List paraList = aggregationDesc.getParameters(); - List parametersOIList = new ArrayList(); - for (ExprNodeDesc expr : paraList) { - parametersOIList.add(expr.getWritableObjectInspector()); - } - GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator( - "sum", parametersOIList, false, false); - aggregationDesc.setGenericUDAFEvaluator(evaluator); - aggregationDesc.setGenericUDAFName("sum"); - } - } - - } - - return null; - } - } - - public static NewQueryGroupbySchemaProc getNewQueryGroupbySchemaProc(){ - return new NewQueryGroupbySchemaProc(); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java index d699308..8eaad55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java @@ -19,32 +19,46 @@ package org.apache.hadoop.hive.ql.optimizer.index; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Stack; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * RewriteQueryUsingAggregateIndexCtx class stores the @@ -53,37 +67,37 @@ */ public final class RewriteQueryUsingAggregateIndexCtx implements NodeProcessorCtx { - - private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb, - String indexTableName, String alias, Set columns, String aggregateFunction) { + private static final Log LOG = LogFactory.getLog(RewriteQueryUsingAggregateIndexCtx.class.getName()); + private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb, + RewriteCanApplyCtx canApplyCtx) { this.parseContext = parseContext; this.hiveDb = hiveDb; - this.indexTableName = indexTableName; - this.alias = alias; - this.aggregateFunction = aggregateFunction; - this.columns = columns; + this.canApplyCtx = canApplyCtx; + this.indexTableName = canApplyCtx.getIndexTableName(); + this.alias = canApplyCtx.getAlias(); + this.aggregateFunction = canApplyCtx.getAggFunction(); this.opc = parseContext.getOpParseCtx(); + this.indexKey = canApplyCtx.getIndexKey(); } public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseContext, - Hive hiveDb, String indexTableName, String alias, - Set columns, String aggregateFunction) { + Hive hiveDb, RewriteCanApplyCtx canApplyCtx) { return new RewriteQueryUsingAggregateIndexCtx( - parseContext, hiveDb, indexTableName, alias, columns, aggregateFunction); + parseContext, hiveDb, canApplyCtx); } - private Map, OpParseContext> opc = new LinkedHashMap, OpParseContext>(); private final Hive hiveDb; private final ParseContext parseContext; + private RewriteCanApplyCtx canApplyCtx; //We need the GenericUDAFEvaluator for GenericUDAF function "sum" private GenericUDAFEvaluator eval = null; private final String indexTableName; private final String alias; private final String aggregateFunction; - private final Set columns; private ExprNodeColumnDesc aggrExprNode = null; + private String indexKey; public Map, OpParseContext> getOpc() { return opc; @@ -116,55 +130,7 @@ public void setAggrExprNode(ExprNodeColumnDesc aggrExprNode) { public ExprNodeColumnDesc getAggrExprNode() { return aggrExprNode; } - - /** - * Walk the original operator tree using the {@link DefaultGraphWalker} using the rules. - * Each of the rules invoke respective methods from the {@link RewriteQueryUsingAggregateIndex} - * to rewrite the original query using aggregate index. - * - * @param topOp - * @throws SemanticException - */ - public void invokeRewriteQueryProc( - Operator topOp) throws SemanticException{ - Map opRules = new LinkedHashMap(); - - // replace scan operator containing original table with index table - opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), - RewriteQueryUsingAggregateIndex.getReplaceTableScanProc()); - //rule that replaces index key selection with - //sum(`_count_of_indexed_column`) function in original query - opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + "%"), - RewriteQueryUsingAggregateIndex.getNewQuerySelectSchemaProc()); - //Manipulates the ExprNodeDesc from GroupByOperator aggregation list - opRules.put(new RuleRegExp("R3", GroupByOperator.getOperatorName() + "%"), - RewriteQueryUsingAggregateIndex.getNewQueryGroupbySchemaProc()); - - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, this); - GraphWalker ogw = new DefaultGraphWalker(disp); - - // Create a list of topop nodes - List topNodes = new ArrayList(); - topNodes.add(topOp); - ogw.startWalking(topNodes, null); - } - - /** - * Default procedure for {@link DefaultRuleDispatcher}. - * @return - */ - private NodeProcessor getDefaultProc() { - return new NodeProcessor() { - @Override - public Object process(Node nd, Stack stack, - NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - return null; - } - }; - } - + public String getAlias() { return alias; } @@ -173,7 +139,216 @@ public String getAggregateFunction() { return aggregateFunction; } - public Set getColumns() { - return columns; + public String getIndexKey() { + return indexKey; + } + + public void setIndexKey(String indexKey) { + this.indexKey = indexKey; + } + + public void invokeRewriteQueryProc() throws SemanticException { + this.replaceTableScanProcess(canApplyCtx.getTableScanOperator()); + //We need aggrExprNode. Thus, replaceGroupByOperatorProcess should come before replaceSelectOperatorProcess + for (int index = 0; index < canApplyCtx.getGroupByOperators().size(); index++) { + this.replaceGroupByOperatorProcess(canApplyCtx.getGroupByOperators().get(index), index); + } + for (SelectOperator selectperator : canApplyCtx.getSelectOperators()) { + this.replaceSelectOperatorProcess(selectperator); + } + } + + /** + * This method replaces the original TableScanOperator with the new + * TableScanOperator and metadata that scans over the index table rather than + * scanning over the original table. + * + */ + private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException { + RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this; + String alias = rewriteQueryCtx.getAlias(); + + // Need to remove the original TableScanOperators from these data structures + // and add new ones + Map topToTable = rewriteQueryCtx.getParseContext().getTopToTable(); + Map> topOps = rewriteQueryCtx.getParseContext() + .getTopOps(); + Map, OpParseContext> opParseContext = rewriteQueryCtx + .getParseContext().getOpParseCtx(); + + // need this to set rowResolver for new scanOperator + OpParseContext operatorContext = opParseContext.get(scanOperator); + + // remove original TableScanOperator + topOps.remove(alias); + topToTable.remove(scanOperator); + opParseContext.remove(scanOperator); + + // construct a new descriptor for the index table scan + TableScanDesc indexTableScanDesc = new TableScanDesc(); + indexTableScanDesc.setGatherStats(false); + + String indexTableName = rewriteQueryCtx.getIndexName(); + Table indexTableHandle = null; + try { + indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName); + } catch (HiveException e) { + LOG.error("Error while getting the table handle for index table."); + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + + String k = indexTableName + Path.SEPARATOR; + indexTableScanDesc.setStatsAggPrefix(k); + scanOperator.setConf(indexTableScanDesc); + + // Construct the new RowResolver for the new TableScanOperator + RowResolver rr = new RowResolver(); + try { + StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle + .getDeserializer().getObjectInspector(); + StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey()); + rr.put(indexTableName, field.getFieldName(), new ColumnInfo(field.getFieldName(), + TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), + indexTableName, false)); + } catch (SerDeException e) { + LOG.error("Error while creating the RowResolver for new TableScanOperator."); + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + + // Set row resolver for new table + operatorContext.setRowResolver(rr); + + String newAlias = indexTableName; + int index = alias.lastIndexOf(":"); + if (index >= 0) { + newAlias = alias.substring(0, index) + ":" + indexTableName; + } + + // Scan operator now points to other table + topToTable.put(scanOperator, indexTableHandle); + scanOperator.getConf().setAlias(newAlias); + scanOperator.setAlias(indexTableName); + topOps.put(newAlias, scanOperator); + opParseContext.put(scanOperator, operatorContext); + rewriteQueryCtx.getParseContext().setTopToTable((HashMap) topToTable); + rewriteQueryCtx.getParseContext().setTopOps( + (HashMap>) topOps); + rewriteQueryCtx.getParseContext().setOpParseCtx( + (LinkedHashMap, OpParseContext>) opParseContext); + + ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rr, + Arrays.asList(rewriteQueryCtx.getIndexKey())); + } + + /** + * This method replaces the original SelectOperator with the new + * SelectOperator with a new column indexed_key_column. + */ + private void replaceSelectOperatorProcess(SelectOperator operator) throws SemanticException { + RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this; + // we need to set the colList, outputColumnNames, colExprMap, + // rowSchema for only that SelectOperator which precedes the GroupByOperator + // count(indexed_key_column) needs to be replaced by + // sum(`_count_of_indexed_key_column`) + List selColList = operator.getConf().getColList(); + selColList.add(rewriteQueryCtx.getAggrExprNode()); + + List selOutputColNames = operator.getConf().getOutputColumnNames(); + selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn()); + + operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(), + rewriteQueryCtx.getAggrExprNode()); + + RowSchema selRS = operator.getSchema(); + List selRSSignature = selRS.getSignature(); + // Need to create a new type for Column[_count_of_indexed_key_column] node + PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo("bigint"); + pti.setTypeName("bigint"); + ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "", false); + selRSSignature.add(newCI); + selRS.setSignature((ArrayList) selRSSignature); + operator.setSchema(selRS); + } + + /** + * We need to replace the count(indexed_column_key) GenericUDAF aggregation + * function for group-by construct to "sum" GenericUDAF. This method creates a + * new operator tree for a sample query that creates a GroupByOperator with + * sum aggregation function and uses that GroupByOperator information to + * replace the original GroupByOperator aggregation information. It replaces + * the AggregationDesc (aggregation descriptor) of the old GroupByOperator + * with the new Aggregation Desc of the new GroupByOperator. + * + * @return + */ + private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) + throws SemanticException { + RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this; + + // We need to replace the GroupByOperator which is before RS + if (index == 0) { + // the query contains the sum aggregation GenericUDAF + String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + + " from " + rewriteQueryCtx.getIndexName() + " group by " + + rewriteQueryCtx.getIndexKey() + " "; + // create a new ParseContext for the query to retrieve its operator tree, + // and the required GroupByOperator from it + ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree( + rewriteQueryCtx.getParseContext().getConf(), selReplacementCommand); + + // we get our new GroupByOperator here + Map> newGbyOpMap = newDAGContext.getGroupOpToInputTables(); + GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next(); + GroupByDesc oldConf = operator.getConf(); + + // we need this information to set the correct colList, outputColumnNames + // in SelectOperator + ExprNodeColumnDesc aggrExprNode = null; + + // Construct the new AggregationDesc to get rid of the current + // internal names and replace them with new internal names + // as required by the operator tree + GroupByDesc newConf = newGbyOperator.getConf(); + List newAggrList = newConf.getAggregators(); + if (newAggrList != null && newAggrList.size() > 0) { + for (AggregationDesc aggregationDesc : newAggrList) { + rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator()); + aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0); + rewriteQueryCtx.setAggrExprNode(aggrExprNode); + } + } + + // Now the GroupByOperator has the new AggregationList; + // sum(`_count_of_indexed_key`) + // instead of count(indexed_key) + OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator); + RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver(); + gbyOPC.setRowResolver(gbyRR); + rewriteQueryCtx.getOpc().put(operator, gbyOPC); + + oldConf.setAggregators((ArrayList) newAggrList); + operator.setConf(oldConf); + + } else { + // we just need to reset the GenericUDAFEvaluator and its name for this + // GroupByOperator whose parent is the ReduceSinkOperator + GroupByDesc childConf = (GroupByDesc) operator.getConf(); + List childAggrList = childConf.getAggregators(); + if (childAggrList != null && childAggrList.size() > 0) { + for (AggregationDesc aggregationDesc : childAggrList) { + List paraList = aggregationDesc.getParameters(); + List parametersOIList = new ArrayList(); + for (ExprNodeDesc expr : paraList) { + parametersOIList.add(expr.getWritableObjectInspector()); + } + GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", + parametersOIList, false, false); + aggregationDesc.setGenericUDAFEvaluator(evaluator); + aggregationDesc.setGenericUDAFName("sum"); + } + } + } } } diff --git a/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q new file mode 100644 index 0000000..9ce7d85 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q @@ -0,0 +1,173 @@ +set hive.stats.dbclass=fs; +set hive.stats.autogather=true; +set hive.cbo.enable=true; + +DROP TABLE IF EXISTS lineitem_ix; +CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix; + +CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)"); +ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD; + +explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate; + +select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate; + +set hive.optimize.index.groupby=true; + +explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate; + +select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate; + +set hive.optimize.index.groupby=false; + + +explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +set hive.optimize.index.groupby=true; + +explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month; + +explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month; + +explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy; + +CREATE TABLE tbl(key int, value int); +CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX tbl_key_idx ON tbl REBUILD; + +EXPLAIN select key, count(key) from tbl where key = 1 group by key; +EXPLAIN select key, count(key) from tbl group by key; + +EXPLAIN select count(1) from tbl; +EXPLAIN select count(key) from tbl; + +EXPLAIN select key FROM tbl GROUP BY key; +EXPLAIN select key FROM tbl GROUP BY value, key; +EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key; +EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key; +EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3); + +EXPLAIN select key, value FROM tbl GROUP BY value, key; +EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value; + +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key FROM tbl; +EXPLAIN select DISTINCT key, value FROM tbl; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3; +EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key; +EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key; +EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl; + +EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2; + +DROP TABLE tbl; + +CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int); +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11; +INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12; + +CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); + +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD; +EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key; + +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD; +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD; +ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD; +EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key; + +DROP INDEX tbl_part_index on tblpart; +DROP TABLE tblpart; + +CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; +LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl; + +CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX tbl_key_idx ON tbl REBUILD; + +set hive.optimize.index.groupby=false; +explain select key, count(key) from tbl group by key order by key; +select key, count(key) from tbl group by key order by key; +set hive.optimize.index.groupby=true; +explain select key, count(key) from tbl group by key order by key; +select key, count(key) from tbl group by key order by key; +DROP TABLE tbl; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q new file mode 100644 index 0000000..78329d6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q @@ -0,0 +1,376 @@ +set hive.stats.dbclass=fs; +set hive.stats.autogather=true; +set hive.cbo.enable=true; +set hive.optimize.index.groupby=true; + +DROP TABLE IF EXISTS lineitem_ix; +CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix; + +CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)"); +ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD; + +CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)"); +ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD; + +explain +select count(1) +from lineitem_ix; + +select count(1) +from lineitem_ix; + +explain +select count(L_ORDERKEY) +from lineitem_ix; + +select count(L_ORDERKEY) +from lineitem_ix; + +explain select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY; + +select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY; + +explain +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY; + +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY; + +explain +select L_ORDERKEY, count(1) +from lineitem_ix +group by L_ORDERKEY; + +select L_ORDERKEY, count(1) +from lineitem_ix +group by L_ORDERKEY; + +explain +select count(L_ORDERKEY+1) +from lineitem_ix; + +select count(L_ORDERKEY+1) +from lineitem_ix; + +explain +select L_ORDERKEY, count(L_ORDERKEY+1) +from lineitem_ix +group by L_ORDERKEY; + +select L_ORDERKEY, count(L_ORDERKEY+1) +from lineitem_ix +group by L_ORDERKEY; + +explain +select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY; + +select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY; + +explain +select L_ORDERKEY, count(1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY; + +select L_ORDERKEY, count(1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY; + + +explain +select L_ORDERKEY as a, count(1) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY; + +select L_ORDERKEY as a, count(1) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY; + +explain +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY; + +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY; + + +explain +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY; + +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY; + +explain +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA; + +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA; + +explain +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum; + +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum; + +explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + + +explain +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum; + + +explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum; + +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum; + + +explain +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum; + +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum; + +explain +select keysum, count(keysum) as ckeysum +from +(select L_ORDERKEY, count(L_ORDERKEY) as keysum +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY)tabA +group by keysum; + +select keysum, count(keysum) as ckeysum +from +(select L_ORDERKEY, count(L_ORDERKEY) as keysum +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY)tabA +group by keysum; + + + +CREATE INDEX src_key_idx ON TABLE src(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)"); +ALTER INDEX src_key_idx ON src REBUILD; + +explain +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b); + +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b); + + +explain +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b and tabB.a < '2'); + +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b and tabB.a < '2'); + +EXPLAIN +select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1; + +select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1; + +EXPLAIN +select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1; + +select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1; + +EXPLAIN +select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2; + +select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2; + +--with cbo on, the following query can use idx + +explain +select b, count(b) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by b; + +select b, count(b) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by b; + +--with cbo on, the following query can not use idx because AggFunc is empty here + +explain +select a, count(a) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by a; + +select a, count(a) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by a; + + diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out index fdc1dc6..f115f55 100644 --- a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out @@ -1024,17 +1024,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: tbl + alias: default.default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (key = 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: 1 (type: int) - outputColumnNames: key + expressions: 1 (type: int), _count_of_key (type: bigint) + outputColumnNames: key, _count_of_key Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(key) + aggregations: sum(_count_of_key) keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -1047,7 +1047,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1189,14 +1189,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: tbl + alias: default.default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: key + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: key, _count_of_key Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(key) + aggregations: sum(_count_of_key) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -1206,7 +1206,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out new file mode 100644 index 0000000..baaf138 --- /dev/null +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out @@ -0,0 +1,2637 @@ +PREHOOK: query: DROP TABLE IF EXISTS lineitem_ix +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS lineitem_ix +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_shipdate, type:string, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_lshipdate_idx__.l_shipdate SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_shipdate, type:string, comment:null), ] +PREHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992-04-27 1 +1992-07-02 1 +1992-07-10 1 +1992-07-21 1 +1993-04-01 1 +1993-04-13 1 +1993-05-14 1 +1993-10-29 2 +1993-11-09 2 +1993-12-04 1 +1993-12-09 2 +1993-12-14 1 +1994-01-12 1 +1994-01-16 1 +1994-01-26 2 +1994-02-02 1 +1994-02-13 1 +1994-02-19 1 +1994-02-21 1 +1994-03-03 1 +1994-03-17 1 +1994-06-03 1 +1994-06-06 1 +1994-07-02 1 +1994-07-19 1 +1994-07-31 1 +1994-08-08 1 +1994-08-17 1 +1994-08-24 1 +1994-09-30 1 +1994-10-03 1 +1994-10-16 1 +1994-10-31 1 +1994-12-01 1 +1994-12-24 1 +1994-12-30 1 +1995-04-20 1 +1995-07-06 1 +1995-07-17 1 +1995-07-21 1 +1995-08-04 1 +1995-08-07 1 +1995-08-14 1 +1995-08-28 1 +1995-10-23 1 +1995-11-08 1 +1995-11-26 1 +1996-01-10 1 +1996-01-15 1 +1996-01-16 1 +1996-01-19 1 +1996-01-22 1 +1996-01-29 1 +1996-01-30 1 +1996-02-01 2 +1996-02-03 1 +1996-02-10 1 +1996-02-11 1 +1996-02-21 1 +1996-03-13 1 +1996-03-21 1 +1996-03-30 1 +1996-04-12 1 +1996-04-21 1 +1996-05-07 1 +1996-09-26 1 +1996-09-29 1 +1996-10-02 1 +1996-10-17 1 +1996-11-04 1 +1996-11-14 1 +1996-12-08 1 +1997-01-25 1 +1997-01-27 1 +1997-01-28 1 +1997-02-20 1 +1997-03-18 1 +1997-04-17 1 +1997-04-19 1 +1998-01-29 1 +1998-02-23 1 +1998-03-05 1 +1998-04-10 1 +1998-04-12 1 +1998-05-23 1 +1998-06-19 1 +1998-06-24 1 +1998-06-26 1 +1998-06-27 1 +1998-07-04 1 +1998-08-11 1 +1998-08-13 1 +1998-10-09 1 +1998-10-23 1 +1998-10-30 1 +PREHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select l_shipdate, count(l_shipdate) +from lineitem_ix +group by l_shipdate +order by l_shipdate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992-04-27 1 +1992-07-02 1 +1992-07-10 1 +1992-07-21 1 +1993-04-01 1 +1993-04-13 1 +1993-05-14 1 +1993-10-29 2 +1993-11-09 2 +1993-12-04 1 +1993-12-09 2 +1993-12-14 1 +1994-01-12 1 +1994-01-16 1 +1994-01-26 2 +1994-02-02 1 +1994-02-13 1 +1994-02-19 1 +1994-02-21 1 +1994-03-03 1 +1994-03-17 1 +1994-06-03 1 +1994-06-06 1 +1994-07-02 1 +1994-07-19 1 +1994-07-31 1 +1994-08-08 1 +1994-08-17 1 +1994-08-24 1 +1994-09-30 1 +1994-10-03 1 +1994-10-16 1 +1994-10-31 1 +1994-12-01 1 +1994-12-24 1 +1994-12-30 1 +1995-04-20 1 +1995-07-06 1 +1995-07-17 1 +1995-07-21 1 +1995-08-04 1 +1995-08-07 1 +1995-08-14 1 +1995-08-28 1 +1995-10-23 1 +1995-11-08 1 +1995-11-26 1 +1996-01-10 1 +1996-01-15 1 +1996-01-16 1 +1996-01-19 1 +1996-01-22 1 +1996-01-29 1 +1996-01-30 1 +1996-02-01 2 +1996-02-03 1 +1996-02-10 1 +1996-02-11 1 +1996-02-21 1 +1996-03-13 1 +1996-03-21 1 +1996-03-30 1 +1996-04-12 1 +1996-04-21 1 +1996-05-07 1 +1996-09-26 1 +1996-09-29 1 +1996-10-02 1 +1996-10-17 1 +1996-11-04 1 +1996-11-14 1 +1996-12-08 1 +1997-01-25 1 +1997-01-27 1 +1997-01-28 1 +1997-02-20 1 +1997-03-18 1 +1997-04-17 1 +1997-04-19 1 +1998-01-29 1 +1998-02-23 1 +1998-03-05 1 +1998-04-10 1 +1998-04-12 1 +1998-05-23 1 +1998-06-19 1 +1998-06-24 1 +1998-06-26 1 +1998-06-27 1 +1998-07-04 1 +1998-08-11 1 +1998-08-13 1 +1998-10-09 1 +1998-10-23 1 +1998-10-30 1 +PREHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +POSTHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992 4 1 +1992 7 3 +1993 4 2 +1993 5 1 +1993 10 2 +1993 11 2 +1993 12 4 +1994 1 4 +1994 2 4 +1994 3 2 +1994 6 2 +1994 7 3 +1994 8 3 +1994 9 1 +1994 10 3 +1994 12 3 +1995 4 1 +1995 7 3 +1995 8 4 +1995 10 1 +1995 11 2 +1996 1 7 +1996 2 6 +1996 3 3 +1996 4 2 +1996 5 1 +1996 9 2 +1996 10 2 +1996 11 2 +1996 12 1 +1997 1 3 +1997 2 1 +1997 3 1 +1997 4 2 +1998 1 1 +1998 2 1 +1998 3 1 +1998 4 2 +1998 5 1 +1998 6 4 +1998 7 1 +1998 8 2 +1998 10 3 +PREHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +POSTHOOK: query: explain select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _col1, _col2, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments +from lineitem_ix +group by year(l_shipdate), month(l_shipdate) +order by year, month +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_lshipdate_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1992 4 1 +1992 7 3 +1993 4 2 +1993 5 1 +1993 10 2 +1993 11 2 +1993 12 4 +1994 1 4 +1994 2 4 +1994 3 2 +1994 6 2 +1994 7 3 +1994 8 3 +1994 9 1 +1994 10 3 +1994 12 3 +1995 4 1 +1995 7 3 +1995 8 4 +1995 10 1 +1995 11 2 +1996 1 7 +1996 2 6 +1996 3 3 +1996 4 2 +1996 5 1 +1996 9 2 +1996 10 2 +1996 11 2 +1996 12 1 +1997 1 3 +1997 2 1 +1997 3 1 +1997 4 2 +1998 1 1 +1998 2 1 +1998 3 1 +1998 4 2 +1998 5 1 +1998 6 4 +1998 7 1 +1998 8 2 +1998 10 3 +PREHOOK: query: explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month +PREHOOK: type: QUERY +POSTHOOK: query: explain select lastyear.month, + thisyear.month, + (thisyear.monthly_shipments - lastyear.monthly_shipments) / +lastyear.monthly_shipments as monthly_shipments_delta + from (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1997 + group by year(l_shipdate), month(l_shipdate) + ) lastyear join + (select year(l_shipdate) as year, + month(l_shipdate) as month, + count(l_shipdate) as monthly_shipments + from lineitem_ix + where year(l_shipdate) = 1998 + group by year(l_shipdate), month(l_shipdate) + ) thisyear + on lastyear.month = thisyear.month +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lastyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (year(l_shipdate) = 1997) (type: boolean) + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 23 Data size: 3295 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 12 Data size: 1719 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 12 Data size: 1719 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 859 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 859 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 859 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 859 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 6 Data size: 944 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col4 (type: int), ((_col5 - _col2) / _col2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: thisyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (year(l_shipdate) = 1998) (type: boolean) + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 23 Data size: 3295 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 12 Data size: 1719 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 12 Data size: 1719 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 859 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 859 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy +PREHOOK: type: QUERY +POSTHOOK: query: explain select l_shipdate, cnt +from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate +union all +select l_shipdate, l_orderkey as cnt +from lineitem_ix) dummy +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_shipdate) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 13615 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6735 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_shipdate (type: string), UDFToLong(l_orderkey) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 163 Data size: 18834 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 18834 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 18834 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 163 Data size: 18834 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 18834 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 18834 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE tbl(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tbl +POSTHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +PREHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tbl +PREHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN select key, count(key) from tbl where key = 1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, count(key) from tbl where key = 1 group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 1 (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, count(key) from tbl group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, count(key) from tbl group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(1) from tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(1) from tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(key) from tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(key) from tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY value, key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY value, key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(key, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, value FROM tbl GROUP BY value, key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, value FROM tbl GROUP BY value, key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: int), key (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((value = 2) and (key = 3)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: DROP TABLE tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tblpart +POSTHOOK: query: CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tblpart +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tblpart +POSTHOOK: query: CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tblpart +POSTHOOK: Output: default@default__tblpart_tbl_part_index__ +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tblpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tblpart +PREHOOK: Input: default@tblpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tblpart +POSTHOOK: Input: default@tblpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__tblpart_tbl_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(tblpart)tblpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._count_of_key EXPRESSION [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(tblpart)tblpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tblpart_tbl_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(tblpart)tblpart.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tblpart_tbl_part_index__ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP INDEX tbl_part_index on tblpart +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@tblpart +POSTHOOK: query: DROP INDEX tbl_part_index on tblpart +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@tblpart +PREHOOK: query: DROP TABLE tblpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tblpart +PREHOOK: Output: default@tblpart +POSTHOOK: query: DROP TABLE tblpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tblpart +POSTHOOK: Output: default@tblpart +PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tbl +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tbl +PREHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@tbl +POSTHOOK: query: CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +PREHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@tbl +PREHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: query: ALTER INDEX tbl_key_idx ON tbl REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@default__tbl_tbl_key_idx__ +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 1 +2 3 +3 2 +4 2 +6 1 +7 1 +PREHOOK: query: explain select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__tbl_tbl_key_idx__ + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), _count_of_key (type: bigint) + outputColumnNames: _col0, _count_of_key + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 724 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from tbl group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__tbl_tbl_key_idx__ +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from tbl group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__tbl_tbl_key_idx__ +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 1 +2 3 +3 2 +4 2 +6 1 +7 1 +PREHOOK: query: DROP TABLE tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl +PREHOOK: Output: default@tbl +POSTHOOK: query: DROP TABLE tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl +POSTHOOK: Output: default@tbl diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out new file mode 100644 index 0000000..4598f30 --- /dev/null +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out @@ -0,0 +1,3888 @@ +PREHOOK: query: DROP TABLE IF EXISTS lineitem_ix +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS lineitem_ix +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: CREATE TABLE lineitem_ix (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_ix +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_ix +PREHOOK: query: CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__._count_of_l_orderkey EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_orderkey_idx__.l_orderkey SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_orderkey, type:int, comment:null), ] +PREHOOK: query: CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@lineitem_ix +POSTHOOK: query: CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +PREHOOK: query: ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@lineitem_ix +PREHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +POSTHOOK: query: ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Output: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__._bucketname SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__._count_of_l_partkey EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__._offsets EXPRESSION [(lineitem_ix)lineitem_ix.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__lineitem_ix_lineitem_ix_l_partkey_idx__.l_partkey SIMPLE [(lineitem_ix)lineitem_ix.FieldSchema(name:l_partkey, type:int, comment:null), ] +PREHOOK: query: explain +select count(1) +from lineitem_ix +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) +from lineitem_ix +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 0 Data size: 12099 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 12099 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) +from lineitem_ix +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select count(1) +from lineitem_ix +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +100 +PREHOOK: query: explain +select count(L_ORDERKEY) +from lineitem_ix +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(L_ORDERKEY) +from lineitem_ix +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(L_ORDERKEY) +from lineitem_ix +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select count(L_ORDERKEY) +from lineitem_ix +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +100 +PREHOOK: query: explain select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), count(_col1) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 + _col1) (type: int), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY+L_PARTKEY as keysum, +count(L_ORDERKEY), count(L_PARTKEY) +from lineitem_ix +group by L_ORDERKEY, L_PARTKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +2133 1 1 +15636 1 1 +24028 1 1 +63701 1 1 +67311 1 1 +155191 1 1 +106172 1 1 +4300 1 1 +19039 1 1 +29383 1 1 +62146 1 1 +128452 1 1 +183098 1 1 +88039 1 1 +37536 1 1 +108575 1 1 +123932 1 1 +139642 1 1 +79258 1 1 +94787 1 1 +145250 1 1 +151901 1 1 +157245 1 1 +163080 1 1 +182059 1 1 +2775 1 1 +11647 1 1 +44193 1 1 +82736 1 1 +85843 1 1 +197953 1 1 +33951 1 1 +60552 1 1 +61369 1 1 +137502 1 1 +88396 1 1 +89448 1 1 +169578 1 1 +485 1 1 +30797 1 1 +85210 1 1 +119952 1 1 +120931 1 1 +161975 1 1 +119803 1 1 +12940 1 1 +22667 1 1 +126819 1 1 +175877 1 1 +2359 1 1 +20629 1 1 +54558 1 1 +67870 1 1 +94407 1 1 +186621 1 1 +86015 1 1 +1453 1 1 +59759 1 1 +73880 1 1 +115184 1 1 +173555 1 1 +20260 1 1 +21703 1 1 +40680 1 1 +87581 1 1 +173667 1 1 +178373 1 1 +7136 1 1 +35048 1 1 +82826 1 1 +94796 1 1 +102629 1 1 +139315 1 1 +175248 1 1 +18573 1 1 +37571 1 1 +92139 1 1 +104249 1 1 +115278 1 1 +137336 1 1 +37201 1 1 +45804 1 1 +55725 1 1 +64198 1 1 +179879 1 1 +196226 1 1 +34503 1 1 +62002 1 1 +65987 1 1 +96716 1 1 +103326 1 1 +195706 1 1 +123172 1 1 +135486 1 1 +49665 1 1 +77796 1 1 +119574 1 1 +40314 1 1 +44804 1 1 +109841 1 1 +PREHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey = 7) (type: boolean) + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 7 (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(L_ORDERKEY) +from lineitem_ix +where L_ORDERKEY = 7 +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +7 7 +PREHOOK: query: explain +select L_ORDERKEY, count(1) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(1) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(1) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(1) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +7 7 +32 6 +33 4 +34 3 +35 6 +36 1 +37 3 +38 1 +39 6 +64 1 +65 3 +66 2 +67 6 +68 7 +69 6 +70 6 +71 6 +96 2 +97 3 +98 3 +PREHOOK: query: explain +select count(L_ORDERKEY+1) +from lineitem_ix +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(L_ORDERKEY+1) +from lineitem_ix +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + 1) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(L_ORDERKEY+1) +from lineitem_ix +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select count(L_ORDERKEY+1) +from lineitem_ix +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +100 +PREHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY+1) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY+1) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), (l_orderkey + 1) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _col1, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(L_ORDERKEY+1) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(L_ORDERKEY+1) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +7 7 +32 6 +33 4 +34 3 +35 6 +36 1 +37 3 +38 1 +39 6 +64 1 +65 3 +66 2 +67 6 +68 7 +69 6 +70 6 +71 6 +96 2 +97 3 +98 3 +PREHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), (((l_orderkey + 1) + l_orderkey) + 2) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _col1, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +7 7 +32 6 +33 4 +34 3 +35 6 +36 1 +37 3 +38 1 +39 6 +64 1 +65 3 +66 2 +67 6 +68 7 +69 6 +70 6 +71 6 +96 2 +97 3 +98 3 +PREHOOK: query: explain +select L_ORDERKEY, count(1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), ((1 + l_orderkey) + 2) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _col1, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(1+L_ORDERKEY+2) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +7 7 +32 6 +33 4 +34 3 +35 6 +36 1 +37 3 +38 1 +39 6 +64 1 +65 3 +66 2 +67 6 +68 7 +69 6 +70 6 +71 6 +96 2 +97 3 +98 3 +PREHOOK: query: explain +select L_ORDERKEY as a, count(1) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY as a, count(1) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey < 7) (type: boolean) + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY as a, count(1) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY as a, count(1) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +PREHOOK: query: explain +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), (l_orderkey + l_partkey) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(keysum), sum(keysum) +from +(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 328000 +2 1 106172 +3 6 426418 +4 1 88039 +5 3 270043 +6 1 139642 +7 7 973580 +32 6 425147 +33 4 293374 +34 3 347422 +35 6 519350 +36 1 119803 +37 3 162426 +38 1 175877 +39 6 426444 +64 1 86015 +65 3 135092 +66 2 288739 +67 6 522264 +68 7 636998 +69 6 505146 +70 6 579033 +71 6 558240 +96 2 258658 +97 3 247035 +98 3 194959 +PREHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +POSTHOOK: query: explain +select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), sum(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY) +from lineitem_ix +group by L_ORDERKEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 6 +2 1 2 +3 6 18 +4 1 4 +5 3 15 +6 1 6 +7 7 49 +32 6 192 +33 4 132 +34 3 102 +35 6 210 +36 1 36 +37 3 111 +38 1 38 +39 6 300 +64 1 64 +65 3 195 +66 2 132 +67 6 402 +68 7 476 +69 6 414 +70 6 420 +71 6 426 +96 2 192 +97 3 291 +98 3 294 +PREHOOK: query: explain +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +PREHOOK: type: QUERY +POSTHOOK: query: explain +select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select colA, count(colA) +from (select L_ORDERKEY as colA from lineitem_ix) tabA +group by colA +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 6 +2 1 +3 6 +4 1 +5 3 +6 1 +7 7 +32 6 +33 4 +34 3 +35 6 +36 1 +37 3 +38 1 +39 6 +64 1 +65 3 +66 2 +67 6 +68 7 +69 6 +70 6 +71 6 +96 2 +97 3 +98 3 +PREHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + l_partkey) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +485 1 +1453 1 +2133 1 +2359 1 +2775 1 +4300 1 +7136 1 +11647 1 +12940 1 +15636 1 +18573 1 +19039 1 +20260 1 +20629 1 +21703 1 +22667 1 +24028 1 +29383 1 +30797 1 +33951 1 +34503 1 +35048 1 +37201 1 +37536 1 +37571 1 +40314 1 +40680 1 +44193 1 +44804 1 +45804 1 +49665 1 +54558 1 +55725 1 +59759 1 +60552 1 +61369 1 +62002 1 +62146 1 +63701 1 +64198 1 +65987 1 +67311 1 +67870 1 +73880 1 +77796 1 +79258 1 +82736 1 +82826 1 +85210 1 +85843 1 +86015 1 +87581 1 +88039 1 +88396 1 +89448 1 +92139 1 +94407 1 +94787 1 +94796 1 +96716 1 +102629 1 +103326 1 +104249 1 +106172 1 +108575 1 +109841 1 +115184 1 +115278 1 +119574 1 +119803 1 +119952 1 +120931 1 +123172 1 +123932 1 +126819 1 +128452 1 +135486 1 +137336 1 +137502 1 +139315 1 +139642 1 +145250 1 +151901 1 +155191 1 +157245 1 +161975 1 +163080 1 +169578 1 +173555 1 +173667 1 +175248 1 +175877 1 +178373 1 +179879 1 +182059 1 +183098 1 +186621 1 +195706 1 +196226 1 +197953 1 +PREHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + 1) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +2 6 +3 1 +4 6 +5 1 +6 3 +7 1 +8 7 +33 6 +34 4 +35 3 +36 6 +37 1 +38 3 +39 1 +40 6 +65 1 +66 3 +67 2 +68 6 +69 7 +70 6 +71 6 +72 6 +97 2 +98 3 +99 3 +PREHOOK: query: explain +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + 1) (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(1) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +2 6 +3 1 +4 6 +5 1 +6 3 +7 1 +8 7 +33 6 +34 4 +35 3 +36 6 +37 1 +38 3 +39 1 +40 6 +65 1 +66 3 +67 2 +68 6 +69 7 +70 6 +71 6 +72 6 +97 2 +98 3 +99 3 +PREHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey = 7) (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 8 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) +from +(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +8 7 +PREHOOK: query: explain +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey = 7) (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 8 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 378 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select ckeysum, count(ckeysum) +from +(select keysum, count(keysum) as ckeysum +from + (select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA +group by keysum) tabB +group by ckeysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +7 1 +PREHOOK: query: explain +select keysum, count(keysum) as ckeysum +from +(select L_ORDERKEY, count(L_ORDERKEY) as keysum +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY)tabA +group by keysum +PREHOOK: type: QUERY +POSTHOOK: query: explain +select keysum, count(keysum) as ckeysum +from +(select L_ORDERKEY, count(L_ORDERKEY) as keysum +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY)tabA +group by keysum +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey < 7) (type: boolean) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select keysum, count(keysum) as ckeysum +from +(select L_ORDERKEY, count(L_ORDERKEY) as keysum +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY)tabA +group by keysum +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select keysum, count(keysum) as ckeysum +from +(select L_ORDERKEY, count(L_ORDERKEY) as keysum +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY)tabA +group by keysum +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 3 +3 1 +6 2 +PREHOOK: query: CREATE INDEX src_key_idx ON TABLE src(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@src +POSTHOOK: query: CREATE INDEX src_key_idx ON TABLE src(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)") +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_key_idx__ +PREHOOK: query: ALTER INDEX src_key_idx ON src REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_key_idx__ +POSTHOOK: query: ALTER INDEX src_key_idx ON src REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_key_idx__ +POSTHOOK: Lineage: default__src_src_key_idx__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_key_idx__._count_of_key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src_key_idx__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_key_idx__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: taba:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey < 7) (type: boolean) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: l_orderkey, _count_of_l_orderkey + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: l_orderkey (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 154 Data size: 19393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 169 Data size: 21442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 169 Data size: 21442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 169 Data size: 21442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tabb:default.default__src_src_key_idx__ + Statistics: Num rows: 309 Data size: 39113 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), _count_of_key (type: bigint) + outputColumnNames: key, _count_of_key + Statistics: Num rows: 309 Data size: 39113 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 39113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 309 Data size: 39113 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 19393 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 154 Data size: 19393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b) +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@default__src_src_key_idx__ +PREHOOK: Input: default@lineitem_ix +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@default__src_src_key_idx__ +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Input: default@src +#### A masked pattern was here #### +6 1 490 1 +6 1 287 1 +6 1 286 1 +6 1 285 1 +6 1 284 1 +6 1 283 1 +6 1 114 1 +6 1 487 1 +6 1 485 1 +6 1 28 1 +6 1 484 1 +6 1 181 1 +6 1 275 1 +6 1 274 1 +6 1 183 1 +6 1 483 1 +6 1 27 1 +6 1 266 1 +6 1 482 1 +6 1 263 1 +6 1 262 1 +6 1 260 1 +6 1 481 1 +6 1 258 1 +6 1 257 1 +6 1 116 1 +6 1 479 1 +6 1 252 1 +6 1 249 1 +6 1 248 1 +6 1 247 1 +6 1 244 1 +6 1 92 1 +6 1 241 1 +6 1 477 1 +6 1 475 1 +6 1 472 1 +6 1 470 1 +6 1 235 1 +6 1 47 1 +6 1 186 1 +6 1 126 1 +6 1 228 1 +6 1 226 1 +6 1 131 1 +6 1 467 1 +6 1 222 1 +6 1 133 1 +6 1 82 1 +6 1 218 1 +6 1 80 1 +6 1 460 1 +6 1 214 1 +6 1 8 1 +6 1 78 1 +6 1 189 1 +6 1 457 1 +6 1 455 1 +6 1 136 1 +6 1 202 1 +6 1 201 1 +6 1 453 1 +6 1 20 1 +6 1 2 1 +6 1 19 1 +6 1 452 1 +6 1 196 1 +6 1 449 1 +6 1 194 1 +6 1 190 1 +6 1 192 1 +6 1 448 1 +6 1 446 1 +6 1 444 1 +6 1 443 1 +6 1 44 1 +6 1 77 1 +6 1 143 1 +6 1 437 1 +6 1 436 1 +6 1 435 1 +6 1 432 1 +6 1 145 1 +6 1 150 1 +6 1 43 1 +6 1 10 1 +6 1 427 1 +6 1 74 1 +6 1 421 1 +6 1 9 1 +6 1 419 1 +6 1 418 1 +6 1 153 1 +6 1 105 1 +6 1 69 1 +6 1 411 1 +6 1 41 1 +6 1 155 1 +6 1 407 1 +6 1 156 1 +6 1 87 1 +6 1 157 1 +6 1 402 1 +6 1 158 1 +6 1 400 1 +6 1 4 1 +6 1 66 1 +6 1 65 1 +6 1 160 1 +6 1 64 1 +6 1 394 1 +6 1 393 1 +6 1 392 1 +6 1 389 1 +6 1 386 1 +6 1 162 1 +6 1 86 1 +6 1 379 1 +6 1 378 1 +6 1 377 1 +6 1 375 1 +6 1 374 1 +6 1 373 1 +6 1 57 1 +6 1 163 1 +6 1 368 1 +6 1 54 1 +6 1 366 1 +6 1 365 1 +6 1 364 1 +6 1 362 1 +6 1 360 1 +6 1 356 1 +6 1 53 1 +6 1 351 1 +6 1 166 1 +6 1 168 1 +6 1 345 1 +6 1 85 1 +6 1 11 1 +6 1 341 1 +6 1 34 1 +6 1 339 1 +6 1 338 1 +6 1 336 1 +6 1 335 1 +6 1 111 1 +6 1 332 1 +6 1 497 1 +6 1 33 1 +6 1 17 1 +6 1 496 1 +6 1 323 1 +6 1 495 1 +6 1 494 1 +6 1 170 1 +6 1 493 1 +6 1 177 1 +6 1 315 1 +6 1 178 1 +6 1 310 1 +6 1 96 1 +6 1 308 1 +6 1 491 1 +6 1 306 1 +6 1 305 1 +6 1 302 1 +6 1 30 1 +6 1 180 1 +6 1 296 1 +6 1 292 1 +6 1 291 1 +6 1 289 1 +4 1 490 1 +4 1 287 1 +4 1 286 1 +4 1 285 1 +4 1 284 1 +4 1 283 1 +4 1 114 1 +4 1 487 1 +4 1 485 1 +4 1 28 1 +4 1 484 1 +4 1 181 1 +4 1 275 1 +4 1 274 1 +4 1 183 1 +4 1 483 1 +4 1 27 1 +4 1 266 1 +4 1 482 1 +4 1 263 1 +4 1 262 1 +4 1 260 1 +4 1 481 1 +4 1 258 1 +4 1 257 1 +4 1 116 1 +4 1 479 1 +4 1 252 1 +4 1 249 1 +4 1 248 1 +4 1 247 1 +4 1 244 1 +4 1 92 1 +4 1 241 1 +4 1 477 1 +4 1 475 1 +4 1 472 1 +4 1 470 1 +4 1 235 1 +4 1 47 1 +4 1 186 1 +4 1 126 1 +4 1 228 1 +4 1 226 1 +4 1 131 1 +4 1 467 1 +4 1 222 1 +4 1 133 1 +4 1 82 1 +4 1 218 1 +4 1 80 1 +4 1 460 1 +4 1 214 1 +4 1 8 1 +4 1 78 1 +4 1 189 1 +4 1 457 1 +4 1 455 1 +4 1 136 1 +4 1 202 1 +4 1 201 1 +4 1 453 1 +4 1 20 1 +4 1 2 1 +4 1 19 1 +4 1 452 1 +4 1 196 1 +4 1 449 1 +4 1 194 1 +4 1 190 1 +4 1 192 1 +4 1 448 1 +4 1 446 1 +4 1 444 1 +4 1 443 1 +4 1 44 1 +4 1 77 1 +4 1 143 1 +4 1 437 1 +4 1 436 1 +4 1 435 1 +4 1 432 1 +4 1 145 1 +4 1 150 1 +4 1 43 1 +4 1 10 1 +4 1 427 1 +4 1 74 1 +4 1 421 1 +4 1 9 1 +4 1 419 1 +4 1 418 1 +4 1 153 1 +4 1 105 1 +4 1 69 1 +4 1 411 1 +4 1 41 1 +4 1 155 1 +4 1 407 1 +4 1 156 1 +4 1 87 1 +4 1 157 1 +4 1 402 1 +4 1 158 1 +4 1 400 1 +4 1 4 1 +4 1 66 1 +4 1 65 1 +4 1 160 1 +4 1 64 1 +4 1 394 1 +4 1 393 1 +4 1 392 1 +4 1 389 1 +4 1 386 1 +4 1 162 1 +4 1 86 1 +4 1 379 1 +4 1 378 1 +4 1 377 1 +4 1 375 1 +4 1 374 1 +4 1 373 1 +4 1 57 1 +4 1 163 1 +4 1 368 1 +4 1 54 1 +4 1 366 1 +4 1 365 1 +4 1 364 1 +4 1 362 1 +4 1 360 1 +4 1 356 1 +4 1 53 1 +4 1 351 1 +4 1 166 1 +4 1 168 1 +4 1 345 1 +4 1 85 1 +4 1 11 1 +4 1 341 1 +4 1 34 1 +4 1 339 1 +4 1 338 1 +4 1 336 1 +4 1 335 1 +4 1 111 1 +4 1 332 1 +4 1 497 1 +4 1 33 1 +4 1 17 1 +4 1 496 1 +4 1 323 1 +4 1 495 1 +4 1 494 1 +4 1 170 1 +4 1 493 1 +4 1 177 1 +4 1 315 1 +4 1 178 1 +4 1 310 1 +4 1 96 1 +4 1 308 1 +4 1 491 1 +4 1 306 1 +4 1 305 1 +4 1 302 1 +4 1 30 1 +4 1 180 1 +4 1 296 1 +4 1 292 1 +4 1 291 1 +4 1 289 1 +2 1 490 1 +2 1 287 1 +2 1 286 1 +2 1 285 1 +2 1 284 1 +2 1 283 1 +2 1 114 1 +2 1 487 1 +2 1 485 1 +2 1 28 1 +2 1 484 1 +2 1 181 1 +2 1 275 1 +2 1 274 1 +2 1 183 1 +2 1 483 1 +2 1 27 1 +2 1 266 1 +2 1 482 1 +2 1 263 1 +2 1 262 1 +2 1 260 1 +2 1 481 1 +2 1 258 1 +2 1 257 1 +2 1 116 1 +2 1 479 1 +2 1 252 1 +2 1 249 1 +2 1 248 1 +2 1 247 1 +2 1 244 1 +2 1 92 1 +2 1 241 1 +2 1 477 1 +2 1 475 1 +2 1 472 1 +2 1 470 1 +2 1 235 1 +2 1 47 1 +2 1 186 1 +2 1 126 1 +2 1 228 1 +2 1 226 1 +2 1 131 1 +2 1 467 1 +2 1 222 1 +2 1 133 1 +2 1 82 1 +2 1 218 1 +2 1 80 1 +2 1 460 1 +2 1 214 1 +2 1 8 1 +2 1 78 1 +2 1 189 1 +2 1 457 1 +2 1 455 1 +2 1 136 1 +2 1 202 1 +2 1 201 1 +2 1 453 1 +2 1 20 1 +2 1 2 1 +2 1 19 1 +2 1 452 1 +2 1 196 1 +2 1 449 1 +2 1 194 1 +2 1 190 1 +2 1 192 1 +2 1 448 1 +2 1 446 1 +2 1 444 1 +2 1 443 1 +2 1 44 1 +2 1 77 1 +2 1 143 1 +2 1 437 1 +2 1 436 1 +2 1 435 1 +2 1 432 1 +2 1 145 1 +2 1 150 1 +2 1 43 1 +2 1 10 1 +2 1 427 1 +2 1 74 1 +2 1 421 1 +2 1 9 1 +2 1 419 1 +2 1 418 1 +2 1 153 1 +2 1 105 1 +2 1 69 1 +2 1 411 1 +2 1 41 1 +2 1 155 1 +2 1 407 1 +2 1 156 1 +2 1 87 1 +2 1 157 1 +2 1 402 1 +2 1 158 1 +2 1 400 1 +2 1 4 1 +2 1 66 1 +2 1 65 1 +2 1 160 1 +2 1 64 1 +2 1 394 1 +2 1 393 1 +2 1 392 1 +2 1 389 1 +2 1 386 1 +2 1 162 1 +2 1 86 1 +2 1 379 1 +2 1 378 1 +2 1 377 1 +2 1 375 1 +2 1 374 1 +2 1 373 1 +2 1 57 1 +2 1 163 1 +2 1 368 1 +2 1 54 1 +2 1 366 1 +2 1 365 1 +2 1 364 1 +2 1 362 1 +2 1 360 1 +2 1 356 1 +2 1 53 1 +2 1 351 1 +2 1 166 1 +2 1 168 1 +2 1 345 1 +2 1 85 1 +2 1 11 1 +2 1 341 1 +2 1 34 1 +2 1 339 1 +2 1 338 1 +2 1 336 1 +2 1 335 1 +2 1 111 1 +2 1 332 1 +2 1 497 1 +2 1 33 1 +2 1 17 1 +2 1 496 1 +2 1 323 1 +2 1 495 1 +2 1 494 1 +2 1 170 1 +2 1 493 1 +2 1 177 1 +2 1 315 1 +2 1 178 1 +2 1 310 1 +2 1 96 1 +2 1 308 1 +2 1 491 1 +2 1 306 1 +2 1 305 1 +2 1 302 1 +2 1 30 1 +2 1 180 1 +2 1 296 1 +2 1 292 1 +2 1 291 1 +2 1 289 1 +5 3 498 3 +5 3 369 3 +5 3 384 3 +5 3 396 3 +5 3 403 3 +5 3 409 3 +5 3 417 3 +5 3 5 3 +5 3 430 3 +5 3 70 3 +5 3 119 3 +5 3 0 3 +5 3 431 3 +5 3 438 3 +5 3 480 3 +5 3 193 3 +5 3 199 3 +5 3 208 3 +5 3 187 3 +5 3 273 3 +5 3 298 3 +5 3 454 3 +5 3 311 3 +5 3 316 3 +5 3 466 3 +5 3 90 3 +5 3 128 3 +5 3 318 3 +5 3 327 3 +5 3 167 3 +5 3 35 3 +PREHOOK: query: explain +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b and tabB.a < '2') +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b and tabB.a < '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: taba:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey < 7) (type: boolean) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: l_orderkey, _count_of_l_orderkey + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: l_orderkey (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 51 Data size: 6455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 56 Data size: 7100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 56 Data size: 7100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 7100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tabb:default.default__src_src_key_idx__ + Statistics: Num rows: 309 Data size: 39113 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < '2') (type: boolean) + Statistics: Num rows: 103 Data size: 13037 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), _count_of_key (type: bigint) + outputColumnNames: key, _count_of_key + Statistics: Num rows: 103 Data size: 13037 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 103 Data size: 13037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 13037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 6455 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 51 Data size: 6455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b and tabB.a < '2') +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@default__src_src_key_idx__ +PREHOOK: Input: default@lineitem_ix +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select tabA.a, tabA.b, tabB.a, tabB.b +from +(select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY) tabA +join +(select key as a, count(key) as b +from src +group by key +) tabB +on (tabA.b=tabB.b and tabB.a < '2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@default__src_src_key_idx__ +POSTHOOK: Input: default@lineitem_ix +POSTHOOK: Input: default@src +#### A masked pattern was here #### +6 1 156 1 +6 1 155 1 +6 1 153 1 +6 1 186 1 +6 1 150 1 +6 1 183 1 +6 1 181 1 +6 1 180 1 +6 1 145 1 +6 1 143 1 +6 1 111 1 +6 1 11 1 +6 1 136 1 +6 1 178 1 +6 1 133 1 +6 1 131 1 +6 1 177 1 +6 1 194 1 +6 1 126 1 +6 1 192 1 +6 1 105 1 +6 1 190 1 +6 1 19 1 +6 1 170 1 +6 1 116 1 +6 1 17 1 +6 1 10 1 +6 1 168 1 +6 1 189 1 +6 1 166 1 +6 1 196 1 +6 1 114 1 +6 1 163 1 +6 1 162 1 +6 1 160 1 +6 1 158 1 +6 1 157 1 +4 1 156 1 +4 1 155 1 +4 1 153 1 +4 1 186 1 +4 1 150 1 +4 1 183 1 +4 1 181 1 +4 1 180 1 +4 1 145 1 +4 1 143 1 +4 1 111 1 +4 1 11 1 +4 1 136 1 +4 1 178 1 +4 1 133 1 +4 1 131 1 +4 1 177 1 +4 1 194 1 +4 1 126 1 +4 1 192 1 +4 1 105 1 +4 1 190 1 +4 1 19 1 +4 1 170 1 +4 1 116 1 +4 1 17 1 +4 1 10 1 +4 1 168 1 +4 1 189 1 +4 1 166 1 +4 1 196 1 +4 1 114 1 +4 1 163 1 +4 1 162 1 +4 1 160 1 +4 1 158 1 +4 1 157 1 +2 1 156 1 +2 1 155 1 +2 1 153 1 +2 1 186 1 +2 1 150 1 +2 1 183 1 +2 1 181 1 +2 1 180 1 +2 1 145 1 +2 1 143 1 +2 1 111 1 +2 1 11 1 +2 1 136 1 +2 1 178 1 +2 1 133 1 +2 1 131 1 +2 1 177 1 +2 1 194 1 +2 1 126 1 +2 1 192 1 +2 1 105 1 +2 1 190 1 +2 1 19 1 +2 1 170 1 +2 1 116 1 +2 1 17 1 +2 1 10 1 +2 1 168 1 +2 1 189 1 +2 1 166 1 +2 1 196 1 +2 1 114 1 +2 1 163 1 +2 1 162 1 +2 1 160 1 +2 1 158 1 +2 1 157 1 +5 3 199 3 +5 3 193 3 +5 3 187 3 +5 3 167 3 +5 3 128 3 +5 3 119 3 +5 3 0 3 +PREHOOK: query: EXPLAIN +select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), (l_orderkey + 1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 +2 +3 +4 +5 +6 +7 +32 +33 +34 +35 +36 +37 +38 +39 +64 +65 +66 +67 +68 +69 +70 +71 +96 +97 +98 +PREHOOK: query: EXPLAIN +select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), (l_orderkey + 1) (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _col1, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 2 6 +2 3 1 +3 4 6 +4 5 1 +5 6 3 +6 7 1 +7 8 7 +32 33 6 +33 34 4 +34 35 3 +35 36 6 +36 37 1 +37 38 3 +38 39 1 +39 40 6 +64 65 1 +65 66 3 +66 67 2 +67 68 6 +68 69 7 +69 70 6 +70 71 6 +71 72 6 +96 97 2 +97 98 3 +98 99 3 +PREHOOK: query: EXPLAIN +select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (l_orderkey + 2) (type: int), l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _col1, _count_of_l_orderkey + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +3 6 +4 1 +5 6 +6 1 +7 3 +8 1 +9 7 +34 6 +35 4 +36 3 +37 6 +38 1 +39 3 +40 1 +41 6 +66 1 +67 3 +68 2 +69 6 +70 7 +71 6 +72 6 +73 6 +98 2 +99 3 +100 3 +PREHOOK: query: --with cbo on, the following query can use idx + +explain +select b, count(b) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by b +PREHOOK: type: QUERY +POSTHOOK: query: --with cbo on, the following query can use idx + +explain +select b, count(b) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + Statistics: Num rows: 26 Data size: 3904 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey < 7) (type: boolean) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) + outputColumnNames: _col0, _count_of_l_orderkey + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_orderkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 1201 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 20 Data size: 2829 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 2829 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 20 Data size: 2829 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 20 Data size: 2829 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 2829 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 20 Data size: 2829 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1414 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1414 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1414 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: $hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_partkey_idx__ + Statistics: Num rows: 100 Data size: 13937 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_partkey < 10) (type: boolean) + Statistics: Num rows: 33 Data size: 4599 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int), _count_of_l_partkey (type: bigint) + outputColumnNames: _col0, _count_of_l_partkey + Statistics: Num rows: 33 Data size: 4599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_count_of_l_partkey) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 4599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 33 Data size: 4599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 2229 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 2229 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b, count(b) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +PREHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select b, count(b) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_orderkey_idx__ +POSTHOOK: Input: default@default__lineitem_ix_lineitem_ix_l_partkey_idx__ +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 3 +3 1 +6 2 +PREHOOK: query: --with cbo on, the following query can not use idx because AggFunc is empty here + +explain +select a, count(a) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by a +PREHOOK: type: QUERY +POSTHOOK: query: --with cbo on, the following query can not use idx because AggFunc is empty here + +explain +select a, count(a) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_orderkey < 7) (type: boolean) + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 1008 Data size: 4032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1008 Data size: 4032 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1008 Data size: 4032 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 1008 Data size: 4032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1008 Data size: 4032 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1008 Data size: 4032 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_ix + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_partkey < 10) (type: boolean) + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1008 Data size: 4033 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 504 Data size: 2016 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(a) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +POSTHOOK: query: select a, count(a) as ckeysum +from +( +select L_ORDERKEY as a, count(L_ORDERKEY) as b +from lineitem_ix +where L_ORDERKEY < 7 +group by L_ORDERKEY +union all +select L_PARTKEY as a, count(L_PARTKEY) as b +from lineitem_ix +where L_PARTKEY < 10 +group by L_PARTKEY +) tabA +group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_ix +#### A masked pattern was here #### +1 1 +2 1 +3 1 +4 1 +5 1 +6 1