diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java index e91b4d5..640dfac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ExtractOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.ForwardOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -152,6 +153,12 @@ public class ReduceSinkDeDuplication implements Transform{ if(ctx.contains(childReduceSink)) { return null; } + + List> childOp = childReduceSink.getChildOperators(); + if (childOp != null && childOp.size() == 1 && childOp.get(0) instanceof GroupByOperator) { + ctx.addRejectedReduceSinkOperator(childReduceSink); + return null; + } ParseContext pGraphContext = ctx.getPctx(); HashMap childColumnMapping = getPartitionAndKeyColumnMapping(childReduceSink); diff --git ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q new file mode 100644 index 0000000..118173a --- /dev/null +++ ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q @@ -0,0 +1,6 @@ +create table t1( key_int1 int, key_int2 int, key_string1 string, key_string2 string); + +set hive.map.aggr=false; +select Q1.key_int1, sum(Q1.key_int1) from (select * from t1 cluster by key_int1) Q1 group by Q1.key_int1; + +drop table t1; diff --git ql/src/test/results/clientpositive/reduce_deduplicate_exclude_gby.q.out ql/src/test/results/clientpositive/reduce_deduplicate_exclude_gby.q.out new file mode 100644 index 0000000..f4dacff --- /dev/null +++ ql/src/test/results/clientpositive/reduce_deduplicate_exclude_gby.q.out @@ -0,0 +1,21 @@ +PREHOOK: query: create table t1( key_int1 int, key_int2 int, key_string1 string, key_string2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table t1( key_int1 int, key_int2 int, key_string1 string, key_string2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t1 +PREHOOK: query: select Q1.key_int1, sum(Q1.key_int1) from (select * from t1 cluster by key_int1) Q1 group by Q1.key_int1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-11-27_23-31-58_414_7329222711647021730/-mr-10000 +POSTHOOK: query: select Q1.key_int1, sum(Q1.key_int1) from (select * from t1 cluster by key_int1) Q1 group by Q1.key_int1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-11-27_23-31-58_414_7329222711647021730/-mr-10000 +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1