diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index a0e2e67..a129cf3 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -40,15 +40,14 @@
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -70,12 +69,17 @@
* external names if possible.
* 3. In ExprNode & in ColumnInfo the tableAlias/VirtualColumn is specified
* differently for different GB/RS in pipeline. Remove the different treatments.
- * 3. VirtualColMap needs to be maintained
+ * 4. VirtualColMap needs to be maintained
*
*/
public class HiveGBOpConvUtil {
+
private static enum HIVEGBPHYSICALMODE {
- MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB, MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB, MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT, MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT, NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
+ MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB,
+ MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB,
+ MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT,
+ MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT,
+ NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
};
private static class UDAFAttrs {
@@ -94,8 +98,8 @@
private final List gbKeys = new ArrayList();
private final List grpSets = new ArrayList();
- private boolean grpSetRqrAdditionalMRJob;
- private boolean grpIdFunctionNeeded;
+ private boolean grpSetRqrAdditionalMRJob;
+ private boolean grpIdFunctionNeeded;
private final List distExprNames = new ArrayList();
private final List distExprTypes = new ArrayList();
@@ -105,12 +109,12 @@
private final List deDupedNonDistIrefs = new ArrayList();
private final List udafAttrs = new ArrayList();
- private boolean containsDistinctAggr = false;
+ private boolean containsDistinctAggr = false;
- float groupByMemoryUsage;
- float memoryThreshold;
+ float groupByMemoryUsage;
+ float memoryThreshold;
- private HIVEGBPHYSICALMODE gbPhysicalPipelineMode;
+ private HIVEGBPHYSICALMODE gbPhysicalPipelineMode;
};
private static HIVEGBPHYSICALMODE getAggOPMode(HiveConf hc, GBInfo gbInfo) {
@@ -203,11 +207,14 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf
for (int i = 0; i < argLst.size(); i++) {
if (!distinctRefs.contains(argLst.get(i))) {
distinctRefs.add(argLst.get(i));
- distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv);
- gbInfo.distExprNodes.add(distinctExpr);
- gbInfo.distExprNames.add(argNames.get(i));
- gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+ // Only distinct nodes that are NOT part of the key should be added to distExprNodes
+ if (ExprNodeDescUtils.indexOf(distinctExpr, gbInfo.gbKeys) < 0) {
+ distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
+ gbInfo.distExprNodes.add(distinctExpr);
+ gbInfo.distExprNames.add(argNames.get(i));
+ gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+ }
}
}
}
@@ -254,10 +261,10 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf
}
// special handling for count, similar to PlanModifierForASTConv::replaceEmptyGroupAggr()
- udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
- new ArrayList(udafAttrs.udafParams), new ASTNode(),
- udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 &&
- "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
+ udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
+ new ArrayList(udafAttrs.udafParams), new ASTNode(),
+ udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 &&
+ "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
gbInfo.udafAttrs.add(udafAttrs);
}