diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 82a141d..7be2440 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -507,6 +507,13 @@ public final class ColumnPrunerProcFactory { String outputCol = keys.get(i); String[] nm = parResover.reverseLookup(outputCol); ColumnInfo colInfo = oldRR.get(nm[0], nm[1]); + if (colInfo == null) { + outputCol = Utilities.ReduceField.KEY.toString() + "." + outputCol; + nm = oldRR.reverseLookup(outputCol); + if (nm != null) { + colInfo = oldRR.get(nm[0], nm[1]); + } + } if (colInfo != null) { String internalName=colInfo.getInternalName(); newMap.put(internalName, oldMap.get(internalName)); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java index e91b4d5..4621d7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java @@ -28,16 +28,7 @@ import java.util.Map; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.ExtractOperator; -import org.apache.hadoop.hive.ql.exec.FilterOperator; -import org.apache.hadoop.hive.ql.exec.ForwardOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.ScriptOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -53,9 +44,14 @@ import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEGROUPBYSKEW; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST; + /** * If two reducer sink operators share the same partition/sort columns, we * should merge them. This should happen after map join optimization because map @@ -73,6 +69,8 @@ public class ReduceSinkDeDuplication implements Transform{ ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext); Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", "RS%.*RS%GBY%"), ReduceSinkDeduplicateProcFactory + .getReducerGbyReducerGbyProc()); opRules.put(new RuleRegExp("R1", "RS%.*RS%"), ReduceSinkDeduplicateProcFactory .getReducerReducerProc()); @@ -119,7 +117,11 @@ public class ReduceSinkDeDuplication implements Transform{ static class ReduceSinkDeduplicateProcFactory { - + + + public static NodeProcessor getReducerGbyReducerGbyProc() { + return new ReducerGbyReducerGbyProc(); + } public static NodeProcessor getReducerReducerProc() { return new ReducerReducerProc(); @@ -449,5 +451,209 @@ public class ReduceSinkDeDuplication implements Transform{ } } + static class ReducerGbyReducerGbyProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx; + ParseContext pGraphContext = ctx.getPctx(); + + if (pGraphContext.getConf().getBoolVar(HIVEGROUPBYSKEW)) { + return false; + } + + boolean trustScript = pGraphContext.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST); + + GroupByOperator childGroupBy = (GroupByOperator)nd; + ReduceSinkOperator childRS = (ReduceSinkOperator) getSingleParent(childGroupBy); + if (childRS == null) { + return false; + } + + ReduceSinkOperator parentRS = findPossibleReduceSink(getSingleParent(childRS), trustScript); + if (parentRS == null) { + return false; + } + List ckeys = childRS.getConf().getKeyCols(); + List pkeys = parentRS.getConf().getKeyCols(); + if (pkeys != null && !pkeys.isEmpty() && !sameKeys(ckeys, pkeys, childRS, parentRS)) { + return false; + } + List cpars = childRS.getConf().getPartitionCols(); + List ppars = parentRS.getConf().getPartitionCols(); + if (ppars != null && !ppars.isEmpty() && !sameKeys(cpars, ppars, childRS, parentRS)) { + return false; + } + String corder = childRS.getConf().getOrder(); + String porder = parentRS.getConf().getOrder(); + if (corder != null && !corder.trim().equals("")) { + if (porder == null || !corder.trim().equals(porder.trim())) { + return false; + } + } else if (porder == null || porder.trim().equals("")) { + parentRS.getConf().setOrder(corder); + } + int creduce = childRS.getConf().getNumReducers(); + int preduce = parentRS.getConf().getNumReducers(); + if (creduce != preduce) { + if (creduce >= 0 && preduce >= 0) { + return false; + } + if (preduce == -1) { + parentRS.getConf().setNumReducers(creduce); + } + } + + replaceReduceSinkWithSelectOperator(parentRS, pGraphContext); + return true; + } + + private boolean sameKeys(List einits, List eterms, Operator init, Operator terminal) { + if (eterms == null || eterms.size() != einits.size()) { + return false; + } + for (int i = 0; i < einits.size(); i++) { + if (!backtrack(einits.get(i), init, terminal).isSame(eterms.get(i))) { + return false; + } + } + return true; + } + + private ArrayList backtrack(List sources, Operator current, Operator terminal) { + ArrayList result = new ArrayList(); + for (ExprNodeDesc expr : sources) { + result.add(backtrack(expr, current, terminal)); + } + return result; + } + + private ExprNodeDesc backtrack(ExprNodeDesc source, Operator current, Operator terminal) { + if (current == terminal) { + return source; + } + if (source instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone(); + ArrayList params = new ArrayList(); + for (ExprNodeDesc param : function.getChildren()) { + params.add(backtrack(param, current, terminal)); + } + function.setChildExprs(params); + return function; + } + if (source instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc column = (ExprNodeColumnDesc) source; + return backtrack(column, current, terminal); + } + if (source instanceof ExprNodeFieldDesc) { + ExprNodeFieldDesc field = (ExprNodeFieldDesc) source; + return new ExprNodeFieldDesc(field.getTypeInfo(), backtrack(field.getDesc(), current, terminal), field.getFieldName(), field.getIsList()); + } + return source; + } + + private ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator current, Operator terminal) { + if (current == null || current == terminal) { + return column; + } + Map mapping = current.getColumnExprMap(); + if (mapping == null || !mapping.containsKey(column.getColumn())) { + return backtrack(column, getSingleParent(current), terminal); + } + return backtrack(mapping.get(column.getColumn()), getSingleParent(current), terminal); + } + + private Operator getSingleParent(Operator operator) { + if (operator.getParentOperators() != null && operator.getParentOperators().size() == 1) { + return operator.getParentOperators().get(0); + } + return null; + } + + private ReduceSinkOperator findPossibleReduceSink(Operator start, boolean trustScript) { + Operator cursor = getSingleParent(start); + for (; cursor != null; cursor = getSingleParent(cursor)) { + if (ReduceSinkOperator.class.isAssignableFrom(cursor.getClass())) { + return (ReduceSinkOperator)cursor; + } + if (!trustScript && cursor instanceof ScriptOperator) { + return null; + } + if (!(cursor instanceof SelectOperator + || cursor instanceof FilterOperator + || cursor instanceof ExtractOperator + || cursor instanceof ForwardOperator + || cursor instanceof ScriptOperator + || cursor instanceof ReduceSinkOperator)) { + return null; + } + } + return null; + } + + // copied from ReducerReducerProc + private void replaceReduceSinkWithSelectOperator( + ReduceSinkOperator childReduceSink, ParseContext pGraphContext) throws SemanticException { + List> parentOp = childReduceSink.getParentOperators(); + List> childOp = childReduceSink.getChildOperators(); + + Operator oldParent = childReduceSink; + + if (childOp != null && childOp.size() == 1 + && ((childOp.get(0)) instanceof ExtractOperator)) { + oldParent = childOp.get(0); + childOp = childOp.get(0).getChildOperators(); + } + + Operator input = parentOp.get(0); + input.getChildOperators().clear(); + + RowResolver inputRR = pGraphContext.getOpParseCtx().get(input).getRowResolver(); + + ArrayList exprs = new ArrayList(); + ArrayList outputs = new ArrayList(); + List outputCols = childReduceSink.getConf().getOutputValueColumnNames(); + RowResolver outputRS = new RowResolver(); + + Map colExprMap = new HashMap(); + + for (int i = 0; i < outputCols.size(); i++) { + String internalName = outputCols.get(i); + String[] nm = inputRR.reverseLookup(internalName); + ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); + ExprNodeDesc colDesc = childReduceSink.getConf().getValueCols().get(i); + exprs.add(colDesc); + outputs.add(internalName); + outputRS.put(nm[0], nm[1], new ColumnInfo(internalName, valueInfo + .getType(), nm[0], valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol())); + colExprMap.put(internalName, colDesc); + } + + SelectDesc select = new SelectDesc(exprs, outputs, false); + + SelectOperator sel = (SelectOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(select, new RowSchema(inputRR + .getColumnInfos()), input), inputRR, pGraphContext); + + sel.setColumnExprMap(colExprMap); + + // Insert the select operator in between. + if (childOp != null) { + sel.setChildOperators(childOp); + for (Operator ch : childOp) { + ch.replaceParent(oldParent, sel); + } + } + } + + private Operator putOpInsertMap( + Operator op, RowResolver rr, ParseContext pGraphContext) { + OpParseContext ctx = new OpParseContext(rr); + pGraphContext.getOpParseCtx().put(op, ctx); + return op; + } + } } } diff --git ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q new file mode 100644 index 0000000..a781635 --- /dev/null +++ ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q @@ -0,0 +1,21 @@ +create table t1 (int1 int, int2 int, str1 string, str2 string); +create table t2 (int1 int, int2 int, str1 string, str2 string); + +set hive.map.aggr=true; + +explain select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1; +explain select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1); + +select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1; +select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1); + +set hive.map.aggr=false; + +explain select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1; +explain select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1); + +select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1; +select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1); + +drop table t1; +drop table t2; \ No newline at end of file diff --git ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out new file mode 100644 index 0000000..53db9f0 --- /dev/null +++ ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -0,0 +1,414 @@ +PREHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (int1 int, int2 int, str1 string, str2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table t2 (int1 int, int2 int, str1 string, str2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t2 +PREHOOK: query: explain select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL int1))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Q1) int1)) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL Q1) int1)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL Q1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + q1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: int1 + type: int + expr: int2 + type: int + expr: str1 + type: string + expr: str2 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: sum(_col0) + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +PREHOOK: type: QUERY +POSTHOOK: query: explain select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (+ (TOK_TABLE_OR_COL int1) 1))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL Q1) int1) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (+ (. (TOK_TABLE_OR_COL Q1) int1) 1)))) (TOK_GROUPBY (+ (. (TOK_TABLE_OR_COL Q1) int1) 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + q1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: int1 + type: int + expr: int2 + type: int + expr: str1 + type: string + expr: str2 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: sum((_col0 + 1)) + bucketGroup: false + keys: + expr: (_col0 + 1) + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-49_530_2448925010641097290/-mr-10000 +POSTHOOK: query: select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-49_530_2448925010641097290/-mr-10000 +PREHOOK: query: select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-52_428_4793720479008258255/-mr-10000 +POSTHOOK: query: select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-52_428_4793720479008258255/-mr-10000 +PREHOOK: query: explain select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL int1))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Q1) int1)) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL Q1) int1)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL Q1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + q1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: int1 + type: int + expr: int2 + type: int + expr: str1 + type: string + expr: str2 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(KEY._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +PREHOOK: type: QUERY +POSTHOOK: query: explain select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (+ (TOK_TABLE_OR_COL int1) 1))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL Q1) int1) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (+ (. (TOK_TABLE_OR_COL Q1) int1) 1)))) (TOK_GROUPBY (+ (. (TOK_TABLE_OR_COL Q1) int1) 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + q1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: int1 + type: int + expr: int2 + type: int + expr: str1 + type: string + expr: str2 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: (_col0 + 1) + type: int + sort order: + + Map-reduce partition columns: + expr: (_col0 + 1) + type: int + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(KEY._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-55_342_5605861073858128309/-mr-10000 +POSTHOOK: query: select Q1.int1, sum(Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-55_342_5605861073858128309/-mr-10000 +PREHOOK: query: select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-57_954_5732523406632917271/-mr-10000 +POSTHOOK: query: select (Q1.int1 + 1), sum(Q1.int1 + 1) from (select * from t1 order by (int1 + 1)) Q1 group by (Q1.int1 + 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-12-02_00-22-57_954_5732523406632917271/-mr-10000 +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: drop table t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: drop table t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2