diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index d53efbf..890a114 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -185,8 +185,8 @@ protected abstract Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateP protected abstract Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException; - // for JOIN-RS case, it's not possible generally to merge if child has - // less key/partition columns than parents + // for JOIN-RS case, it's not possible generally to merge if child dose + // not has the same key/partition columns as parents protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer) throws SemanticException { List> parents = pJoin.getParentOperators(); @@ -194,6 +194,13 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc ReduceSinkDesc cRSc = cRS.getConf(); for (ReduceSinkOperator pRSNs : pRSs) { ReduceSinkDesc pRSNc = pRSNs.getConf(); + + // It is not possible to delete reduce sink if distinct function + // is in group operator. Since distinct columns needs to be + // in the key for sorting. + if (cRSc.getDistinctColumnIndices() == null || cRSc.getDistinctColumnIndices().size() > 0) { + return false; + } if (cRSc.getKeyCols().size() < pRSNc.getKeyCols().size()) { return false; }