diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java index b32e04a..788aace 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java @@ -20,10 +20,13 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.Statistics.State; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -68,7 +71,7 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticE */ public static class GenericUDAFBloomFilterEvaluator extends GenericUDAFEvaluator { // Source operator to get the number of entries - private Operator sourceOperator; + private SelectOperator sourceOperator; private long maxEntries = 0; // ObjectInspector for input data. @@ -258,10 +261,13 @@ public long getExpectedEntries() { switch (stats.getColumnStatsState()) { case COMPLETE: case PARTIAL: - // There should only be column stats for one column, use if that is the case. + // There should only be column in sourceOperator List colStats = stats.getColumnStats(); - if (colStats.size() == 1) { - long ndv = colStats.get(0).getCountDistint(); + ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr( + sourceOperator.getConf().getColList().get(0)); + if (colExpr != null + && stats.getColumnStatisticsFromColName(colExpr.getColumn()) != null) { + long ndv = stats.getColumnStatisticsFromColName(colExpr.getColumn()).getCountDistint(); if (ndv > 0) { expectedEntries = ndv; } @@ -279,7 +285,7 @@ public long getExpectedEntries() { return sourceOperator; } - public void setSourceOperator(Operator sourceOperator) { + public void setSourceOperator(SelectOperator sourceOperator) { this.sourceOperator = sourceOperator; } diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index cc6cf47..2dcfd6b 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -63,7 +63,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -95,7 +95,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=25) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE