diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java index 82085dd..d62b858 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -148,12 +148,7 @@ public int encodeHash(long hashcode) { } public int getSize() { - - // merge temp list before getting the size of sparse map - if (tempListIdx != 0) { - mergeTempListToSparseMap(); - } - return sparseMap.size(); + return sparseMap.size() + tempListIdx; } public void merge(HLLRegister hllRegister) { @@ -172,27 +167,20 @@ public void merge(HLLRegister hllRegister) { } public boolean set(int key, byte value) { - boolean updated = false; - // retain only the largest value for a register index - if (sparseMap.containsKey(key)) { - byte containedVal = sparseMap.get(key); - if (value > containedVal) { - sparseMap.put(key, value); - updated = true; - } - } else { + Byte containedValue = sparseMap.get(key); + if (containedValue == null || value > containedValue) { sparseMap.put(key, value); - updated = true; + return true; } - return updated; + return false; } public TreeMap getSparseMap() { - return sparseMap; + return getMergedSparseMap(); } - public TreeMap getMergedSparseMap() { + private TreeMap getMergedSparseMap() { if (tempListIdx != 0) { mergeTempListToSparseMap(); } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index 8bdb47b..2bf879e 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -290,7 +290,7 @@ public long count() { // if encoding is still SPARSE use linear counting with increase // accuracy (as we use pPrime bits for register index) int mPrime = 1 << sparseRegister.getPPrime(); - cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSize()); + cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSparseMap().size()); } else { // for DENSE encoding, use bias table lookup for HLLNoBias algorithm