diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java index 2e1d15c..2def168 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -50,6 +51,10 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { || reduceWork.getReducer() == null) { continue; } + // GROUPBY operator in reducer may not be processed in parallel. Skip optimizing. + if (OperatorUtils.findSingleOperator(reduceWork.getReducer(), GroupByOperator.class) != null) { + continue; + } Operator operator = mapWork.getAliasToWork().values().iterator().next(); if (!(operator instanceof TableScanOperator)) { continue;