diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 0982059..6f44a92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1013,17 +1013,14 @@ private long applyGBYRule(long numRows, long dvProd) { */ public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { - private boolean pkfkInferred = false; - private long newNumRows = 0; - private List> parents; - private CommonJoinOperator jop; - private int numAttr = 1; @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - jop = (CommonJoinOperator) nd; - parents = jop.getParentOperators(); + long newNumRows = 0; + CommonJoinOperator jop = (CommonJoinOperator) nd; + List> parents = jop.getParentOperators(); + int numAttr = 1; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); boolean allStatsAvail = true; @@ -1062,7 +1059,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, numAttr = keyExprs.size(); // infer PK-FK relationship in single attribute join case - inferPKFKRelationship(); + long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop); // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); @@ -1149,7 +1146,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // update join statistics stats.setColumnStats(outColStats); - long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom); + long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom); updateStatsForJoinType(stats, newRowCount, jop, rowCountParents); jop.setStatistics(stats); @@ -1180,7 +1177,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } long maxDataSize = parentSizes.get(maxRowIdx); - long newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); + newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor); Statistics wcStats = new Statistics(); wcStats.setNumRows(newNumRows); @@ -1195,15 +1192,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - private void inferPKFKRelationship() { + private long inferPKFKRelationship(int numAttr, List> parents, + CommonJoinOperator jop) { + long newNumRows = -1; if (numAttr == 1) { // If numAttr is 1, this means we join on one single key column. Map parentsWithPK = getPrimaryKeyCandidates(parents); // We only allow one single PK. if (parentsWithPK.size() != 1) { - LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents."); - return; + LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK parents."); + return newNumRows; } Integer pkPos = parentsWithPK.keySet().iterator().next(); ColStatistics csPK = parentsWithPK.values().iterator().next(); @@ -1215,7 +1214,7 @@ private void inferPKFKRelationship() { // csfKs.size() + 1 == parents.size() means we have a single PK and all // the rest ops are FKs. if (csFKs.size() + 1 == parents.size()) { - getSelectivity(parents, pkPos, csPK, csFKs); + newNumRows = getSelectivity(parents, pkPos, csPK, csFKs, jop); // some debug information if (isDebugEnabled) { @@ -1236,6 +1235,7 @@ private void inferPKFKRelationship() { } } } + return newNumRows; } /** @@ -1243,9 +1243,9 @@ private void inferPKFKRelationship() { * @param csPK - ColStatistics for a single primary key * @param csFKs - ColStatistics for multiple foreign keys */ - private void getSelectivity(List> ops, Integer pkPos, ColStatistics csPK, - Map csFKs) { - this.pkfkInferred = true; + private long getSelectivity(List> ops, Integer pkPos, + ColStatistics csPK, Map csFKs, + CommonJoinOperator jop) { double pkfkSelectivity = Double.MAX_VALUE; int fkInd = -1; // 1. We iterate through all the operators that have candidate FKs and @@ -1290,13 +1290,15 @@ private void getSelectivity(List> ops, Integer distinctVals.add(csFK.getCountDistint()); } } + long newNumRows; if (csFKs.size() == 1) { // there is only one FK - this.newNumRows = newrows; + newNumRows = newrows; } else { // there is more than one FK - this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals)); + newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals)); } + return newNumRows; } private float getSelectivitySimpleTree(Operator op) {