diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index 7fbf8cd..d7e3362 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -711,15 +711,33 @@ private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws Seman // TODO: Why is this needed (doesn't represent any cols) String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size()); outputKeyColumnNames.add(udafName); - for (int i = 0; i < gbInfo.distExprNodes.size(); i++) { - reduceKeys.add(gbInfo.distExprNodes.get(i)); - outputColName = SemanticAnalyzer.getColumnInternalName(i); - String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." - + outputColName; - ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, - false); - colInfoLst.add(colInfo); - colExprMap.put(field, gbInfo.distExprNodes.get(i)); + int numDistinctUDFs = 0; + + for (int i = 0; i< gbInfo.udafAttrs.size(); i++) { + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); + + if (udafAttr.isDistinctUDAF) { + for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) { + Integer distExprNodeIndex = udafAttr.udafParamsIndxInGBInfoDistExprs.get(j); + // If this is part of Grby Key, no need to add to colExprMap. + if (distExprNodeIndex == null) { + continue; + } + ExprNodeDesc distinctUDAFParam = gbInfo.distExprNodes.get(distExprNodeIndex); + + reduceKeys.add(distinctUDAFParam); + outputColName = SemanticAnalyzer.getColumnInternalName(j); + // The naming convention for the Distinct UDAF field is : + // :._col_ + String field = Utilities.ReduceField.KEY.toString() + "." + udafName + + ":" + numDistinctUDFs + "." + outputColName; + ColumnInfo colInfo = new ColumnInfo(field, + distinctUDAFParam.getTypeInfo(), null, false); + colInfoLst.add(colInfo); + colExprMap.put(field, distinctUDAFParam); + } + numDistinctUDFs++; + } } } diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index 95233b0..d1d5905 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -53,8 +53,8 @@ STAGE PLANS: outputColumnNames: $f0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: $f0 (type: string) - sort order: + + key expressions: $f0 (type: string), $f0 (type: string), $f0 (type: string) + sort order: +++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator @@ -121,8 +121,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]