diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 0f9e86b..395467a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -767,6 +767,8 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo // Remove semijoin Op if there is any. if (context.parseContext.getRsOpToTsOpMap().size() > 0) { + removeReduceSideSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, + context.parseContext); removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext); } @@ -775,6 +777,46 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo return mapJoinOp; } + private void removeReduceSideSemiJoinOps(MapJoinOperator mapjoinOp, + Operator parentSelectOpOfBigTable, + ParseContext parseContext) throws SemanticException { + Map semiJoinMap = + new HashMap(); + for (Operator op : parentSelectOpOfBigTable.getChildOperators()) { + if (!(op instanceof SelectOperator)) { + continue; + } + + while (op.getChildOperators().size() > 0 ) { + op = op.getChildOperators().get(0); + if (!(op instanceof ReduceSinkOperator)) { + continue; + } + ReduceSinkOperator rs = (ReduceSinkOperator) op; + TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs); + if (ts != null) { + // There is a semijoin branch + semiJoinMap.put(rs, ts); + break; + } + } + } + + if (semiJoinMap.size() > 0) { + // Find out if these semijoin branches will be on reducer. + Set rsOps = OperatorUtils.findOperatorsUpstream( + parentSelectOpOfBigTable, ReduceSinkOperator.class); + if (rsOps.size() > 0) { + // These semijoin branches will go in reducer, remove them + for (ReduceSinkOperator rs : semiJoinMap.keySet()) { + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(parseContext, rs, + semiJoinMap.get(rs)); + } + } + } + } + // Remove any semijoin branch associated with mapjoin's parent's operator // pipeline which can cause a cycle after mapjoin optimization. private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index aee74ad..fe2dea5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -569,7 +569,8 @@ public static void removeSemiJoinOperator(ParseContext context, TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); DynamicValuePredicateContext filterDynamicValuePredicatesCollection = new DynamicValuePredicateContext(); - collectDynamicValuePredicates(((FilterOperator)(ts.getChildOperators().get(0))).getConf().getPredicate(), + FilterDesc filterDesc = ((FilterOperator)(ts.getChildOperators().get(0))).getConf(); + collectDynamicValuePredicates(filterDesc.getPredicate(), filterDynamicValuePredicatesCollection); for (ExprNodeDesc nodeToRemove : filterDynamicValuePredicatesCollection .childParentMapping.keySet()) { @@ -594,8 +595,8 @@ public static void removeSemiJoinOperator(ParseContext context, ExprNodeDesc nodeParent = filterDynamicValuePredicatesCollection .childParentMapping.get(nodeToRemove); if (nodeParent == null) { - // This was the only predicate, set filter expression to null - ts.getConf().setFilterExpr(null); + // This was the only predicate, set filter expression to const + filterDesc.setPredicate(constNode); } else { int i = nodeParent.getChildren().indexOf(nodeToRemove); nodeParent.getChildren().remove(i);