diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java index 084f9f8..0bd153f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java @@ -100,7 +100,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) { List parameters = aggregation.getParameters(); - aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, pGBY)); + aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, RS)); } pctx.setFetchTabledesc(tsDesc); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d5acb2e..68da88a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -24,6 +24,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -2710,6 +2711,7 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); + output.setColumnExprMap(Collections.emptyMap()); // disable backtracking return output; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index e50026b..8636531 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -225,11 +225,11 @@ public static ExprNodeDesc backtrack(ExprNodeDesc source, Operator current, private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator current, Operator terminal) throws SemanticException { Map mapping = current.getColumnExprMap(); - if (mapping == null || !mapping.containsKey(column.getColumn())) { + if (mapping == null) { return backtrack((ExprNodeDesc)column, current, terminal); } ExprNodeDesc mapped = mapping.get(column.getColumn()); - return backtrack(mapped, current, terminal); + return mapped == null ? null : backtrack(mapped, current, terminal); } public static Operator getSingleParent(Operator current, Operator terminal) diff --git ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q index 48af2c2..3d7df11 100644 --- ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q +++ ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q @@ -11,7 +11,7 @@ explain select key, sum(key) from (select * from src distribute by key sort by k explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -- mGBY-RS-rGBY-RS -explain select key, sum(key) as value from src group by key order by key, value; +explain select key, sum(key) as value from src group by key order by key; -- RS-JOIN-mGBY-RS-rGBY explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; -- RS-JOIN-RS @@ -23,7 +23,7 @@ explain select key, count(distinct value) from (select key, value from src group select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key; select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -select key, sum(key) as value from src group by key order by key, value; +select key, sum(key) as value from src group by key order by key; select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; from (select key, value from src group by key, value) s select s.key group by s.key; @@ -36,7 +36,7 @@ explain select key, sum(key) from (select * from src distribute by key sort by k explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -- RS-GBY-RS -explain select key, sum(key) as value from src group by key order by key, value; +explain select key, sum(key) as value from src group by key order by key; -- RS-JOIN-RS-GBY explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; -- RS-JOIN-RS @@ -48,7 +48,7 @@ explain select key, count(distinct value) from (select key, value from src group select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key; select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -select key, sum(key) as value from src group by key order by key, value; +select key, sum(key) as value from src group by key order by key; select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; from (select key, value from src group by key, value) s select s.key group by s.key; diff --git ql/src/test/results/clientpositive/ppd2.q.out ql/src/test/results/clientpositive/ppd2.q.out index e1f10b8..5e7ea00 100644 --- ql/src/test/results/clientpositive/ppd2.q.out +++ ql/src/test/results/clientpositive/ppd2.q.out @@ -28,6 +28,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -48,8 +49,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) @@ -69,11 +70,35 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -419,6 +444,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -442,8 +468,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) @@ -458,20 +484,40 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1) (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out index 1216356..a583937 100644 --- ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out +++ ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -171,10 +171,10 @@ STAGE PLANS: limit: -1 PREHOOK: query: -- mGBY-RS-rGBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY POSTHOOK: query: -- mGBY-RS-rGBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -198,8 +198,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) @@ -1467,11 +1467,11 @@ POSTHOOK: Input: default@src 96 96.0 NULL 97 194.0 NULL 98 196.0 NULL -PREHOOK: query: select key, sum(key) as value from src group by key order by key, value +PREHOOK: query: select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select key, sum(key) as value from src group by key order by key, value +POSTHOOK: query: select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -2655,10 +2655,10 @@ STAGE PLANS: limit: -1 PREHOOK: query: -- RS-GBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY POSTHOOK: query: -- RS-GBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2676,8 +2676,8 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string), _col1 (type: double) - sort order: ++ + key expressions: key (type: string) + sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -3927,11 +3927,11 @@ POSTHOOK: Input: default@src 96 96.0 NULL 97 194.0 NULL 98 196.0 NULL -PREHOOK: query: select key, sum(key) as value from src group by key order by key, value +PREHOOK: query: select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select key, sum(key) as value from src group by key order by key, value +POSTHOOK: query: select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/compiler/plan/input20.q.xml ql/src/test/results/compiler/plan/input20.q.xml index f58eec2..806b9d4 100644 --- ql/src/test/results/compiler/plan/input20.q.xml +++ ql/src/test/results/compiler/plan/input20.q.xml @@ -384,6 +384,9 @@ + + + @@ -1028,6 +1031,9 @@ + + + diff --git ql/src/test/results/compiler/plan/input4.q.xml ql/src/test/results/compiler/plan/input4.q.xml index b2cb682..0626e64 100755 --- ql/src/test/results/compiler/plan/input4.q.xml +++ ql/src/test/results/compiler/plan/input4.q.xml @@ -592,6 +592,9 @@ + + + diff --git ql/src/test/results/compiler/plan/input5.q.xml ql/src/test/results/compiler/plan/input5.q.xml index 5749e34..036834e 100644 --- ql/src/test/results/compiler/plan/input5.q.xml +++ ql/src/test/results/compiler/plan/input5.q.xml @@ -540,6 +540,9 @@ + + +