diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 761dbb2..1309fa6 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1624,6 +1624,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "of rows and data size. Since files in tables/partitions are serialized (and optionally\n" + "compressed) the estimates of number of rows and data size cannot be reliably determined.\n" + "This factor is multiplied with the file size to account for serialization and compression."), + HIVE_STATS_IN_CLAUSE_FACTOR("hive.stats.filter.in.factor", (float) 1.0, + "Currently column distribution is assumed to be uniform. This can lead to overestimation/underestimation\n" + + "in the number of rows filtered by a certain operator, which in turn might lead to overprovision or\n" + + "underprovision of resources. This factor is applied to the cardinality estimation of IN clauses in\n" + + "filter operators."), // Concurrency HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 3f82594..5625091 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -473,7 +473,8 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsPr float columnFactor = dvs == 0 ? 0.5f : ((float)dvs / numRows) * values.get(i).size(); factor *= columnFactor; } - return Math.round( (double)numRows * factor); + float inFactor = HiveConf.getFloatVar(aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_IN_CLAUSE_FACTOR); + return Math.round( (double)numRows * factor * inFactor); } private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,