diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 71ed31c..a08e287 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -1261,7 +1262,28 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; colName = engfd.getName(); colType = engfd.getTypeString(); - countDistincts = numRows; + List ndvs = new ArrayList<>(); + if (FunctionRegistry.isDeterministic(engfd.getGenericUDF())){ + for (String col : engfd.getCols()) { + ColStatistics stats = parentStats.getColumnStatisticsFromColName(col); + if (stats != null) { + ndvs.add(stats.getCountDistint()); + } + } + if (ndvs.isEmpty()) { + countDistincts = numRows; + } else { + countDistincts = 1; + for (Long ndv : ndvs) { + countDistincts = safeMult(countDistincts, ndv); + } + if (countDistincts > numRows) { + countDistincts = numRows; + } + } + } else { + countDistincts = numRows; + } oi = engfd.getWritableObjectInspector(); } else if (end instanceof ExprNodeColumnListDesc) { diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 6537a8a..d6efe3e 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -977,9 +977,9 @@ STAGE PLANS: keys: 0 (key + 1) (type: int) 1 (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() mode: hash