diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index dd1d6a1..826ad4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -512,7 +512,12 @@ public static void removeBranch(Operator event) { } public static EdgeType determineEdgeType(BaseWork preceedingWork, BaseWork followingWork, ReduceSinkOperator reduceSinkOperator) { - if(reduceSinkOperator.getConf().isForwarding()) { + // The 1-1 edge should also work for sorted cases, however depending on the details of the shuffle + // this might end up writing multiple compressed files or end up using an in-memory partitioned kv writer + // the condition about ordering = false can be removed at some point with a tweak to the unordered writer + // to never split a single output across multiple files (and never attempt a final merge) + if (reduceSinkOperator.getConf().isForwarding() && + !reduceSinkOperator.getConf().isOrdering()) { return EdgeType.ONE_TO_ONE_EDGE; } if (followingWork instanceof ReduceWork) {