diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index f316f09953..e365f2eec3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -1555,17 +1555,11 @@ private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException { // Stores the Tablescan operators processed to avoid redoing them. - Map tsOps = new HashMap<>(); Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); for (ReduceSinkOperator rs : map.keySet()) { SemiJoinBranchInfo sjInfo = map.get(rs); TableScanOperator ts = sjInfo.getTsOp(); - TableScanOperator tsInMap = tsOps.putIfAbsent(ts, ts); - if (tsInMap != null) { - // Already processed, skip - continue; - } if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { continue; @@ -1594,12 +1588,12 @@ private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) return; } try { - // If stats are not available, just assume its a useful edge + // Get nDVs on Semijoin edge side Statistics stats = selOp.getStatistics(); - ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr( + String selCol = ExprNodeDescUtils.extractColName( selOp.getConf().getColList().get(0)); long nDVs = stats.getColumnStatisticsFromColName( - colExpr.getColumn()).getCountDistint(); + selCol).getCountDistint(); if (nDVs > 0) { // Lookup nDVs on TS side. RuntimeValuesInfo rti = procCtx.parseContext @@ -1607,9 +1601,9 @@ private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) ExprNodeDesc tsExpr = rti.getTsColExpr(); FilterOperator fil = (FilterOperator) (ts.getChildOperators().get(0)); Statistics filStats = fil.getStatistics(); - ExprNodeColumnDesc tsColExpr = ExprNodeDescUtils.getColumnExpr(tsExpr); + String colName = ExprNodeDescUtils.extractColName(tsExpr); long nDVsOfTS = filStats.getColumnStatisticsFromColName( - tsColExpr.getColumn()).getCountDistint(); + colName).getCountDistint(); double nDVsOfTSFactored = nDVsOfTS * procCtx.conf.getFloatVar( ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_DPP_FACTOR); if ((long)nDVsOfTSFactored > nDVs) { @@ -1621,7 +1615,7 @@ private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) } } } catch (NullPointerException e) { - sjInfo.setShouldRemove(false); + // Do nothing } break; }