diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 98fec77010..21d0053611 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -888,19 +888,31 @@ private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, } // Found a semijoin branch. - for (Operator parent : mapjoinOp.getParentOperators()) { - if (!(parent instanceof ReduceSinkOperator)) { - continue; - } + // There can be more than one semijoin branch coming from the parent + // GBY Operator of the RS Operator. + Operator parentGB = op.getParentOperators().get(0); + for (Operator childRS : parentGB.getChildOperators()) { + // Get the RS and TS for this branch + rs = (ReduceSinkOperator) childRS; + ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp(); + assert ts != null; + for (Operator parent : mapjoinOp.getParentOperators()) { + if (!(parent instanceof ReduceSinkOperator)) { + continue; + } - Set tsOps = OperatorUtils.findOperatorsUpstream(parent, - TableScanOperator.class); - for (TableScanOperator parentTS : tsOps) { - // If the parent is same as the ts, then we have a cycle. - if (ts == parentTS) { - semiJoinMap.put(rs, ts); - break; + Set tsOps = OperatorUtils.findOperatorsUpstream(parent, + TableScanOperator.class); + boolean found = false; + for (TableScanOperator parentTS : tsOps) { + // If the parent is same as the ts, then we have a cycle. + if (ts == parentTS) { + semiJoinMap.put(rs, ts); + found = true; + break; + } } + if (found) break; } } } diff --git a/ql/src/test/queries/clientpositive/perf/query78.q b/ql/src/test/queries/clientpositive/perf/query78.q index ca9e6d6cb1..e5d3971bb3 100644 --- a/ql/src/test/queries/clientpositive/perf/query78.q +++ b/ql/src/test/queries/clientpositive/perf/query78.q @@ -1,4 +1,23 @@ set hive.mapred.mode=nonstrict; +set hive.exec.orc.split.strategy=BI; +set hive.tez.bigtable.minsize.semijoin.reduction=100000000; +set hive.auto.convert.join.hashtable.max.entries=40000000; +set tez.runtime.enable.final-merge.in.output=false; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.llap.enable.grace.join.in.llap=false; +set hive.vectorized.groupby.maxentries=1000000; +set hive.auto.convert.join.noconditionaltask.size=1145044992; +set hive.llap.client.consistent.splits=true; +set hive.merge.nway.joins=false; +set hive.map.aggr.hash.min.reduction=0.99; +set tez.runtime.shuffle.parallel.copies=4; +set tez.runtime.optimize.local.fetch=true; +set tez.runtime.optimize.shared.fetch=false; +set hive.tez.min.partition.factor=0.25; +set hive.tez.llap.min.reducer.per.executor=0.95; +set hive.explain.user=false; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.shared.scan=false; -- start query 1 in stream 0 using template query78.tpl and seed 1819994127 explain with ws as