diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index c380a2d..79a3281 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.correlation; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOIN; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK; - import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; @@ -54,12 +51,14 @@ * join optimization will remove reduce sink operators. * * This optimizer removes/replaces child-RS (not parent) which is safer way for DefaultGraphWalker. + * + * Please note that ReduceSinkDeDuplication does not handle patterns involving + * JoinOperators. Those patterns will be handled by CorrelationOptimizer. */ public class ReduceSinkDeDuplication implements Transform { private static final String RS = ReduceSinkOperator.getOperatorName(); private static final String GBY = GroupByOperator.getOperatorName(); - private static final String JOIN = JoinOperator.getOperatorName(); protected ParseContext pGraphContext; @@ -70,10 +69,6 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // generate pruned column list for all relevant operators ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext); - // for auto convert map-joins, it not safe to dedup in here (todo) - boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) && - !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK); - // If multiple rules can be matched with same cost, last rule will be choosen as a processor // see DefaultRuleDispatcher#dispatch() Map opRules = new LinkedHashMap(); @@ -81,11 +76,6 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { ReduceSinkDeduplicateProcFactory.getReducerReducerProc()); opRules.put(new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc()); - if (mergeJoins) { - opRules.put(new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"), - ReduceSinkDeduplicateProcFactory.getJoinReducerProc()); - } - // TODO RS+JOIN // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along @@ -117,10 +107,6 @@ public static NodeProcessor getGroupbyReducerProc() { return new GroupbyReducerProc(); } - public static NodeProcessor getJoinReducerProc() { - return new JoinReducerProc(); - } - public static NodeProcessor getDefaultProc() { return new DefaultProc(); } @@ -169,66 +155,6 @@ protected abstract Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateP protected abstract Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException; - // for JOIN-RS case, it's not possible generally to merge if child has - // more key/partition columns than parents - protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer) - throws SemanticException { - List> parents = pJoin.getParentOperators(); - ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]); - ReduceSinkDesc cRSc = cRS.getConf(); - ReduceSinkDesc pRS0c = pRSs[0].getConf(); - if (cRSc.getKeyCols().size() > pRS0c.getKeyCols().size()) { - return false; - } - if (cRSc.getPartitionCols().size() > pRS0c.getPartitionCols().size()) { - return false; - } - Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers()); - if (moveReducerNumTo == null || - moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) { - return false; - } - - Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder()); - if (moveRSOrderTo == null) { - return false; - } - - boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin); - - int cKeySize = cRSc.getKeyCols().size(); - for (int i = 0; i < cKeySize; i++) { - ExprNodeDesc cexpr = cRSc.getKeyCols().get(i); - ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length]; - for (int tag = 0; tag < pRSs.length; tag++) { - pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i); - } - int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted); - if (found < 0) { - return false; - } - } - int cPartSize = cRSc.getPartitionCols().size(); - for (int i = 0; i < cPartSize; i++) { - ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i); - ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length]; - for (int tag = 0; tag < pRSs.length; tag++) { - pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i); - } - int found = CorrelationUtilities.indexOf(cexpr, pexprs, cRS, pRSs, sorted); - if (found < 0) { - return false; - } - } - - if (moveReducerNumTo > 0) { - for (ReduceSinkOperator pRS : pRSs) { - pRS.getConf().setNumReducers(cRS.getConf().getNumReducers()); - } - } - return true; - } - /** * Current RSDedup remove/replace child RS. So always copies * more specific part of configurations of child RS to that of parent RS. @@ -433,42 +359,6 @@ public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, } } - static class JoinReducerProc extends AbsctractReducerReducerProc { - - // pRS-pJOIN-cRS - @Override - public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) - throws SemanticException { - JoinOperator pJoin = - CorrelationUtilities.findPossibleParent(cRS, JoinOperator.class, dedupCtx.trustScript()); - if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) { - pJoin.getConf().setFixedAsSorted(true); - CorrelationUtilities.replaceReduceSinkWithSelectOperator( - cRS, dedupCtx.getPctx(), dedupCtx); - return true; - } - return false; - } - - // pRS-pJOIN-cRS-cGBY - @Override - public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, - ReduceSinkDeduplicateProcCtx dedupCtx) - throws SemanticException { - Operator start = CorrelationUtilities.getStartForGroupBy(cRS); - JoinOperator pJoin = - CorrelationUtilities.findPossibleParent( - start, JoinOperator.class, dedupCtx.trustScript()); - if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) { - pJoin.getConf().setFixedAsSorted(true); - CorrelationUtilities.removeReduceSinkForGroupBy( - cRS, cGBY, dedupCtx.getPctx(), dedupCtx); - return true; - } - return false; - } - } - static class ReducerReducerProc extends AbsctractReducerReducerProc { // pRS-cRS diff --git ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q index a5e9cdf..12ddf76 100644 --- ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q +++ ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q @@ -12,10 +12,6 @@ explain select key, sum(key), lower(value) from (select * from src order by key) explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -- mGBY-RS-rGBY-RS explain select key, sum(key) as value from src group by key order by key, value; --- RS-JOIN-mGBY-RS-rGBY -explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; --- RS-JOIN-RS -explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; -- mGBY-RS-rGBY-mGBY-RS-rGBY explain from (select key, value from src group by key, value) s select s.key group by s.key; @@ -23,8 +19,6 @@ select key, sum(key) from (select * from src distribute by key sort by key, valu select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); select key, sum(key) as value from src group by key order by key, value; -select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; -select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; from (select key, value from src group by key, value) s select s.key group by s.key; set hive.map.aggr=false; @@ -35,10 +29,6 @@ explain select key, sum(key), lower(value) from (select * from src order by key) explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -- RS-GBY-RS explain select key, sum(key) as value from src group by key order by key, value; --- RS-JOIN-RS-GBY -explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; --- RS-JOIN-RS -explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; -- RS-GBY-RS-GBY explain from (select key, value from src group by key, value) s select s.key group by s.key; @@ -46,6 +36,4 @@ select key, sum(key) from (select * from src distribute by key sort by key, valu select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); select key, sum(key) as value from src group by key order by key, value; -select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; -select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; from (select key, value from src group by key, value) s select s.key group by s.key; diff --git ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out index 3297ebb..fcef055 100644 --- ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out +++ ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -329,239 +329,6 @@ STAGE PLANS: limit: -1 -PREHOOK: query: -- RS-JOIN-mGBY-RS-rGBY -explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -PREHOOK: type: QUERY -POSTHOOK: query: -- RS-JOIN-mGBY-RS-rGBY -explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - src - TableScan - alias: src - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 - TableScan - alias: src1 - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - handleSkewJoin: false - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string - mode: hash - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: -- RS-JOIN-RS -explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -PREHOOK: type: QUERY -POSTHOOK: query: -- RS-JOIN-RS -explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - src - TableScan - alias: src - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 - TableScan - alias: src1 - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - handleSkewJoin: false - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - Reduce Operator Tree: - Extract - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - PREHOOK: query: -- mGBY-RS-rGBY-mGBY-RS-rGBY explain from (select key, value from src group by key, value) s select s.key group by s.key PREHOOK: type: QUERY @@ -1918,78 +1685,6 @@ POSTHOOK: Input: default@src 96 96.0 97 194.0 98 196.0 -PREHOOK: query: select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 384.0 -146 292.0 -150 150.0 -213 426.0 -224 448.0 -238 476.0 -255 510.0 -273 819.0 -278 556.0 -311 933.0 -369 1107.0 -401 2005.0 -406 1624.0 -66 66.0 -98 196.0 -PREHOOK: query: select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 -128 val_128 -128 val_128 -146 val_146 -146 val_146 -150 val_150 -213 val_213 -213 val_213 -224 val_224 -224 val_224 -238 val_238 -238 val_238 -255 val_255 -255 val_255 -273 val_273 -273 val_273 -273 val_273 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -369 val_369 -369 val_369 -369 val_369 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -66 val_66 -98 val_98 -98 val_98 PREHOOK: query: from (select key, value from src group by key, value) s select s.key group by s.key PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2618,225 +2313,6 @@ STAGE PLANS: limit: -1 -PREHOOK: query: -- RS-JOIN-RS-GBY -explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -PREHOOK: type: QUERY -POSTHOOK: query: -- RS-JOIN-RS-GBY -explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - src - TableScan - alias: src - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 - TableScan - alias: src1 - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - handleSkewJoin: false - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(KEY._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - mode: complete - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: -- RS-JOIN-RS -explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -PREHOOK: type: QUERY -POSTHOOK: query: -- RS-JOIN-RS -explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - src - TableScan - alias: src - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 - TableScan - alias: src1 - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 - handleSkewJoin: false - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - Reduce Operator Tree: - Extract - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - PREHOOK: query: -- RS-GBY-RS-GBY explain from (select key, value from src group by key, value) s select s.key group by s.key PREHOOK: type: QUERY @@ -4184,78 +3660,6 @@ POSTHOOK: Input: default@src 96 96.0 97 194.0 98 196.0 -PREHOOK: query: select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 384.0 -146 292.0 -150 150.0 -213 426.0 -224 448.0 -238 476.0 -255 510.0 -273 819.0 -278 556.0 -311 933.0 -369 1107.0 -401 2005.0 -406 1624.0 -66 66.0 -98 196.0 -PREHOOK: query: select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -128 val_128 -128 val_128 -128 val_128 -146 val_146 -146 val_146 -150 val_150 -213 val_213 -213 val_213 -224 val_224 -224 val_224 -238 val_238 -238 val_238 -255 val_255 -255 val_255 -273 val_273 -273 val_273 -273 val_273 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -369 val_369 -369 val_369 -369 val_369 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -66 val_66 -98 val_98 -98 val_98 PREHOOK: query: from (select key, value from src group by key, value) s select s.key group by s.key PREHOOK: type: QUERY PREHOOK: Input: default@src