diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 91d2f1f091..b60b3ae808 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -1036,9 +1036,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); if (expectedEntries == -1 || expectedEntries > pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { - if (sjInfo.getIsHint()) { + if (sjInfo.getIsHint() && expectedEntries == -1) { throw new SemanticException("Removing hinted semijoin due to lack to stats" + " or exceeding max bloom filter entries"); + } else if(sjInfo.getIsHint()) { + // do not remove if hint is provided + continue; } // Remove the semijoin optimization branch along with ALL the mappings // The parent GB2 has all the branches. Collect them and remove them. diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java index b186477009..8fe6597c6e 100644 --- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java +++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java @@ -42,7 +42,7 @@ private static final int DEFAULT_BLOCK_OFFSET_MASK = DEFAULT_BLOCK_SIZE - 1; private static final int DEFAULT_BIT_OFFSET_MASK = Long.SIZE - 1; private final BitSet bitSet; - private final int m; + private final long m; private final int k; // spread k-1 bits to adjacent longs, default is 8 // spreading hash bits within blockSize * longs will make bloom filter L1 cache friendly @@ -59,9 +59,9 @@ public BloomKFilter(long maxNumEntries) { checkArgument(maxNumEntries > 0, "expectedEntries should be > 0"); long numBits = optimalNumOfBits(maxNumEntries, DEFAULT_FPP); this.k = optimalNumOfHashFunctions(maxNumEntries, numBits); - int nLongs = (int) Math.ceil((double) numBits / (double) Long.SIZE); + long nLongs = (long) Math.ceil((double) numBits / (double) Long.SIZE); // additional bits to pad long array to block size - int padLongs = DEFAULT_BLOCK_SIZE - nLongs % DEFAULT_BLOCK_SIZE; + long padLongs = DEFAULT_BLOCK_SIZE - nLongs % DEFAULT_BLOCK_SIZE; this.m = (nLongs + padLongs) * Long.SIZE; this.bitSet = new BitSet(m); checkArgument((bitSet.data.length % DEFAULT_BLOCK_SIZE) == 0, "bitSet has to be block aligned"); @@ -252,7 +252,7 @@ public int getNumHashFunctions() { return k; } - public int getNumBits() { + public long getNumBits() { return m; }