diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index 7fbf8cd..f994309 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -108,6 +108,7 @@ private final List distExprTypes = new ArrayList(); private final List distExprNodes = new ArrayList(); private final List> distColIndices = new ArrayList>(); + private final Map distExprNodesToDistColIndicesCount = new HashMap(); private final List deDupedNonDistIrefs = new ArrayList(); @@ -208,16 +209,26 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf List argNames = HiveCalciteUtil.getFieldNames(argLst, aggInputRel); ExprNodeDesc distinctExpr; for (int i = 0; i < argLst.size(); i++) { + distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv); if (!distinctRefs.contains(argLst.get(i))) { distinctRefs.add(argLst.get(i)); - distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv); // Only distinct nodes that are NOT part of the key should be added to distExprNodes if (ExprNodeDescUtils.indexOf(distinctExpr, gbInfo.gbKeys) < 0) { distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size()); gbInfo.distExprNodes.add(distinctExpr); gbInfo.distExprNames.add(argNames.get(i)); gbInfo.distExprTypes.add(distinctExpr.getTypeInfo()); + gbInfo.distExprNodesToDistColIndicesCount.put(distinctExpr, 1); } + } else { + ExprNodeDesc exprNode = distinctExpr; + for (ExprNodeDesc en : gbInfo.distExprNodes) { + if (exprNode.isSame(en)) { + exprNode = en; + break; + } + } + gbInfo.distExprNodesToDistColIndicesCount.put(exprNode, gbInfo.distExprNodesToDistColIndicesCount.get(exprNode)+1); } } } @@ -711,15 +722,20 @@ private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws Seman // TODO: Why is this needed (doesn't represent any cols) String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size()); outputKeyColumnNames.add(udafName); + int tagCount = 0; + for (int i = 0; i < gbInfo.distExprNodes.size(); i++) { reduceKeys.add(gbInfo.distExprNodes.get(i)); outputColName = SemanticAnalyzer.getColumnInternalName(i); - String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." - + outputColName; - ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, - false); - colInfoLst.add(colInfo); - colExprMap.put(field, gbInfo.distExprNodes.get(i)); + for (int j = 0; j < gbInfo.distExprNodesToDistColIndicesCount.get(gbInfo.distExprNodes.get(i)); j++) { + String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + tagCount + "." + + outputColName; + ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, + false); + colInfoLst.add(colInfo); + colExprMap.put(field, gbInfo.distExprNodes.get(i)); + tagCount++; + } } }