diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java index 949eb19..a9fe98a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; import org.eigenbase.rel.FilterRelBase; import org.eigenbase.rel.JoinRelBase; import org.eigenbase.rel.JoinRelType; @@ -41,6 +42,7 @@ import org.eigenbase.rel.metadata.RelMetadataQuery; import org.eigenbase.rel.rules.SemiJoinRel; import org.eigenbase.relopt.RelOptUtil; +import org.eigenbase.relopt.hep.HepRelVertex; import org.eigenbase.rex.RexBuilder; import org.eigenbase.rex.RexCall; import org.eigenbase.rex.RexInputRef; @@ -270,10 +272,11 @@ public static PKFKRelationInfo analyzeJoinForPKFK(JoinRelBase joinRel) { if (pkSide == 0) { FKSideInfo fkInfo = new FKSideInfo(rightRowCount, rightNDV); + double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount); PKSideInfo pkInfo = new PKSideInfo(leftRowCount, leftNDV, joinRel.getJoinType().generatesNullsOnRight() ? 1.0 : - isPKSideSimpleTree ? RelMetadataQuery.getSelectivity(left, leftPred) : 1.0); + pkSelectivity); return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); } @@ -281,10 +284,11 @@ public static PKFKRelationInfo analyzeJoinForPKFK(JoinRelBase joinRel) { if (pkSide == 1) { FKSideInfo fkInfo = new FKSideInfo(leftRowCount, leftNDV); + double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount); PKSideInfo pkInfo = new PKSideInfo(rightRowCount, rightNDV, joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : - isPKSideSimpleTree ? RelMetadataQuery.getSelectivity(right, rightPred) : 1.0); + pkSelectivity); return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); } @@ -292,6 +296,23 @@ public static PKFKRelationInfo analyzeJoinForPKFK(JoinRelBase joinRel) { return null; } + private static double pkSelectivity(JoinRelBase joinRel, boolean leftChild, + RelNode child, + double childRowCount) { + if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) || + (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) { + return 1.0; + } else { + HiveTableScanRel tScan = HiveRelMdUniqueKeys.getTableScan(child, true); + if (tScan != null) { + double tRowCount = RelMetadataQuery.getRowCount(tScan); + return childRowCount / tRowCount; + } else { + return 1.0; + } + } + } + private static boolean isKey(BitSet c, RelNode rel) { boolean isKey = false; Set keys = RelMetadataQuery.getUniqueKeys(rel); @@ -384,6 +405,10 @@ static boolean check(RelNode r, int joinKey) { @Override public void visit(RelNode node, int ordinal, RelNode parent) { + if (node instanceof HepRelVertex) { + node = ((HepRelVertex) node).getCurrentRel(); + } + if (node instanceof TableAccessRelBase) { simpleTree = true; } else if (node instanceof ProjectRelBase) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java index 06ff584..3221f91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.eigenbase.rel.FilterRelBase; import org.eigenbase.rel.ProjectRelBase; import org.eigenbase.rel.RelNode; import org.eigenbase.rel.metadata.BuiltInMetadata; @@ -37,6 +38,7 @@ import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider; import org.eigenbase.rel.metadata.RelMdUniqueKeys; import org.eigenbase.rel.metadata.RelMetadataProvider; +import org.eigenbase.relopt.hep.HepRelVertex; import org.eigenbase.rex.RexInputRef; import org.eigenbase.rex.RexNode; @@ -59,16 +61,15 @@ */ public Set getUniqueKeys(ProjectRelBase rel, boolean ignoreNulls) { - RelNode child = rel.getChild(); + HiveTableScanRel tScan = getTableScan(rel.getChild(), false); - if (!(child instanceof HiveTableScanRel)) { + if ( tScan == null ) { Function fn = RelMdUniqueKeys.SOURCE.apply( rel.getClass(), BuiltInMetadata.UniqueKeys.class); return ((BuiltInMetadata.UniqueKeys) fn.apply(rel)) .getUniqueKeys(ignoreNulls); } - HiveTableScanRel tScan = (HiveTableScanRel) child; Map posMap = new HashMap(); int projectPos = 0; int colStatsPos = 0; @@ -112,4 +113,26 @@ return keys; } + /* + * traverse a path of Filter, Projects to get to the TableScan. + * In case of Unique keys, stop if you reach a Project, it will be handled + * by the invocation on the Project. + * In case of getting the base rowCount of a Path, keep going past a Project. + */ + static HiveTableScanRel getTableScan(RelNode r, boolean traverseProject) { + + while (r != null && !(r instanceof HiveTableScanRel)) { + if (r instanceof HepRelVertex) { + r = ((HepRelVertex) r).getCurrentRel(); + } else if (r instanceof FilterRelBase) { + r = ((FilterRelBase) r).getChild(); + } else if (traverseProject && r instanceof ProjectRelBase) { + r = ((ProjectRelBase) r).getChild(); + } else { + r = null; + } + } + return r == null ? null : (HiveTableScanRel) r; + } + }