diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index e68618a..db6b05b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -851,6 +851,11 @@ private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, } if (semiJoinMap.size() > 0) { for (ReduceSinkOperator rs : semiJoinMap.keySet()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Found semijoin optimization from the big table side of a map join, which will cause a task cycle. " + + "Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(semiJoinMap.get(rs))); + } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(parseContext, rs, semiJoinMap.get(rs)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 62bd652..abf531b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -235,7 +235,16 @@ private void removeCycleOperator(Set> component, OptimizeTezProcCont GenTezUtils.removeBranch(victim); if (victim == victimRS) { + if (LOG.isDebugEnabled()) { + LOG.debug("Cycle found. Removing semijoin " + + OperatorUtils.getOpNamePretty(victimRS) + " - " + OperatorUtils.getOpNamePretty(victimTS)); + } GenTezUtils.removeSemiJoinOperator(context.parseContext, victimRS, victimTS); + } else { + // at this point we've found the fork in the op pipeline that has the pruning as a child plan. + LOG.info("Disabling dynamic pruning for: " + + ((DynamicPruningEventDesc) victim.getConf()).getTableScan().toString() + + ". Needed to break cyclic dependency"); } return; } @@ -651,6 +660,10 @@ private static void removeSemijoinOptimizationFromSMBJoins( for (ReduceSinkOperator rs : pctx.getRsOpToTsOpMap().keySet()) { if (ts == pctx.getRsOpToTsOpMap().get(rs)) { // match! + if (LOG.isDebugEnabled()) { + LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(pctx, rs, ts); } @@ -750,6 +763,10 @@ private static void removeSemiJoinCyclesDueToMapsideJoins( if (parent == ts) { // We have a cycle! + if (LOG.isDebugEnabled()) { + LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); } @@ -789,6 +806,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (expectedEntries == -1 || expectedEntries > pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { removeSemiJoin = true; + if (LOG.isDebugEnabled()) { + LOG.debug("expectedEntries=" + expectedEntries + ". " + + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. " + + "Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } break; } } @@ -799,6 +822,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, long numRows = ts.getStatistics().getNumRows(); if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { removeSemiJoin = true; + if (LOG.isDebugEnabled()) { + LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } } } @@ -931,6 +958,10 @@ private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) if (semijoins.size() > 0) { for (ReduceSinkOperator rs : semijoins.keySet()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Semijoin optimization with parallel edge to map join. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(semijoins.get(rs))); + } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, semijoins.get(rs));