diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index c3eb886fd2..ac63738329 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -428,7 +428,8 @@ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, final boolean dynamicPartitionPruningEnabled = procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING); final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled && - procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION); + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) && + procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0; final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled && procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED); @@ -438,46 +439,36 @@ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); if (semiJoinReductionEnabled) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); markSemiJoinForDPP(procCtx); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); - - // Removing semijoin optimization when it may not be beneficial - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (semiJoinReductionEnabled) { - removeSemijoinOptimizationByBenefit(procCtx); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); - // Remove any parallel edge between semijoin and mapjoin. - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (semiJoinReductionEnabled) { + // Remove any parallel edge between semijoin and mapjoin. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemijoinsParallelToMapJoin(procCtx); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin"); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin"); - // Remove semijoin optimization if it creates a cycle with mapside joins - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + // Remove semijoin optimization if it creates a cycle with mapside joins + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemiJoinCyclesDueToMapsideJoins(procCtx); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); - // Remove semijoin optimization if SMB join is created. - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + // Remove semijoin optimization if SMB join is created. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemijoinOptimizationFromSMBJoins(procCtx); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); - // Remove bloomfilter if no stats generated - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + // Remove bloomfilter if no stats generated + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemiJoinIfNoStats(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); + + // Removing semijoin optimization when it may not be beneficial + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + removeSemijoinOptimizationByBenefit(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); // after the stats phase we might have some cyclic dependencies that we need // to take care of. @@ -924,21 +915,25 @@ private static void removeSemiJoinCyclesDueToMapsideJoins( } assert parent instanceof ReduceSinkOperator; - while (parent.getParentOperators().size() > 0) { - parent = parent.getParentOperators().get(0); - } - - if (parent == ts) { - // We have a cycle! - if (sjInfo.getIsHint()) { - throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " - + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + // Fetch all the parent TS ops + + Set tsOps = OperatorUtils.findOperatorsUpstream(parent, + TableScanOperator.class); + for (TableScanOperator parentTS : tsOps) { + // If the parent is same as the ts, then we have a cycle. + if (ts == parentTS) { + // We have a cycle! + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); + break; } - GenTezUtils.removeBranch(rs); - GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); } } } @@ -1558,7 +1553,7 @@ private static double computeBloomFilterNetBenefit( private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) throws SemanticException { - List semijoinRsToRemove = new ArrayList(); + List semijoinRsToRemove = new ArrayList<>(); Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); double semijoinReductionThreshold = procCtx.conf.getFloatVar( HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD);