diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index d6ca73f..57e8b62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -431,8 +431,11 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc @Override public int estimate() { JavaDataModel model = JavaDataModel.get(); - return model.lengthFor(columnType) + model.primitive1() + model.primitive2() + - numDV.lengthFor(model); + return model.lengthFor(columnType) + + model.primitive1() + + model.primitive2() + + ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) : + numDV.lengthFor(model)); } protected void initNDVEstimator(int numBitVectors) { @@ -836,7 +839,10 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc public int estimate() { JavaDataModel model = JavaDataModel.get(); return model.primitive1() * 2 + model.primitive2() * 4 + - model.lengthFor(columnType) + numDV.lengthFor(model); + model.lengthFor(columnType) + + ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) : + numDV.lengthFor(model)); + } }; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java index 99f4d71..fa70f49 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.Text; @@ -356,13 +357,17 @@ public long estimateNumDistinctValues() { return ((long)(numDistinctValues)); } - public int lengthFor(JavaDataModel model) { + @InterfaceAudience.LimitedPrivate(value = { "Hive" }) + static int lengthFor(JavaDataModel model, Integer numVector) { int length = model.object(); length += model.primitive1() * 2; // two int length += model.primitive2(); // one double length += model.lengthForRandom() * 2; // two Random - int numVector = getnumBitVectors(); + if (numVector == null) { + numVector = 16; // HiveConf hive.stats.ndv.error default produces 16 vectors + } + if (numVector > 0) { length += model.array() * 3; // three array length += model.primitive1() * numVector * 2; // two int array @@ -372,4 +377,7 @@ public int lengthFor(JavaDataModel model) { return length; } + public int lengthFor(JavaDataModel model) { + return lengthFor(model, getnumBitVectors()); + } }