diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java index 361dbc3209..2f296c0615 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java @@ -17,10 +17,14 @@ */ package org.apache.hadoop.hive.ql.exec; +import com.google.common.annotations.VisibleForTesting; + import static java.util.Arrays.binarySearch; import java.util.Arrays; import java.util.Comparator; +import java.util.HashSet; +import java.util.Set; /** * Implementation of filtering out keys. @@ -36,14 +40,43 @@ private long added = 0; private long total = 0; + //to track effectiveness of boundary check + private long eff = 0; + private final Set topNKeySet; + public TopNKeyFilter(int topN, Comparator comparator) { this.comparator = comparator; this.sortedTopItems = new KeyWrapper[topN +1]; this.topN = topN; + this.topNKeySet = new HashSet<>(); + } + + private int compareWithBoundary(KeyWrapper kw) { + return ((Comparator) comparator).compare(kw, sortedTopItems[topN - 1]); } public final boolean canForward(KeyWrapper kw) { total++; + if (topN > 0 && (size == topN)) { + int comp = compareWithBoundary(kw); + if (comp == 0) { + // special case, if last element is same as kw; + // Avoids duplicate comparison later + eff++; + return true; + } + + if (comp > 0) { + eff++; + return false; + } + } + + if (topNKeySet.contains(kw)) { + repeated++; + return true; + } + int pos = binarySearch(sortedTopItems, 0, size, kw, (Comparator) comparator); if (pos >= 0) { // found repeated++; @@ -53,8 +86,13 @@ public final boolean canForward(KeyWrapper kw) { if (pos >= topN) { // would be inserted to the end, there are topN elements which are smaller/larger return false; } + KeyWrapper oldElement = sortedTopItems[pos]; System.arraycopy(sortedTopItems, pos, sortedTopItems, pos +1, size - pos); // make space by shifting sortedTopItems[pos] = kw.copyKey(); + + topNKeySet.remove(oldElement); + topNKeySet.add(sortedTopItems[pos]); + added++; if (size < topN) { size++; @@ -78,6 +116,7 @@ public String toString() { sb.append(", repeated=").append(repeated); sb.append(", added=").append(added); sb.append(", total=").append(total); + sb.append(", eff=").append(eff); sb.append(", forwardingRatio=").append(forwardingRatio()); sb.append('}'); return sb.toString(); @@ -99,4 +138,19 @@ public float forwardingRatio() { public long getTotal() { return total; } + + @VisibleForTesting + long getEffectiveBoundaryChecks() { + return eff; + } + + @VisibleForTesting + long getRepeated() { + return repeated; + } + + @VisibleForTesting + long getKeySetSize() { + return topNKeySet.size(); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java index a91bc7354a..ddd7dde308 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java @@ -21,6 +21,7 @@ import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import java.util.Comparator; @@ -48,6 +49,10 @@ public void testNothingCanBeForwardedIfTopNIs0() { TopNKeyFilter topNKeyFilter = new TopNKeyFilter(0, TEST_KEY_WRAPPER_COMPARATOR); assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(false)); assertThat(topNKeyFilter.canForward(new TestKeyWrapper(-1)), is(false)); + + assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks()); + assertEquals(0, topNKeyFilter.getRepeated()); + assertEquals(0, topNKeyFilter.getKeySetSize()); } @Test @@ -55,8 +60,47 @@ public void testFirstTopNKeysCanBeForwarded() { TopNKeyFilter topNKeyFilter = new TopNKeyFilter(3, TEST_KEY_WRAPPER_COMPARATOR); assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true)); + + assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks()); + assertEquals(0, topNKeyFilter.getRepeated()); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true)); assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(false)); + assertEquals(1, topNKeyFilter.getEffectiveBoundaryChecks()); + + assertEquals(3, topNKeyFilter.getKeySetSize()); + + //new key same as boundary element (in 1,5,10) + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true)); + assertEquals(2, topNKeyFilter.getEffectiveBoundaryChecks()); + } + + @Test + public void testFirstTopNKeysCanBeForwardedDesc() { + TopNKeyFilter topNKeyFilter = new TopNKeyFilter(3, TEST_KEY_WRAPPER_COMPARATOR.reversed()); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true)); + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true)); + + assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks()); + assertEquals(0, topNKeyFilter.getRepeated()); + + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true)); + + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(true)); + assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks()); + assertEquals(0, topNKeyFilter.getRepeated()); + + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(0)), is(false)); + assertEquals(1, topNKeyFilter.getEffectiveBoundaryChecks()); + + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(true)); + assertEquals(1, topNKeyFilter.getRepeated()); + + assertEquals(3, topNKeyFilter.getKeySetSize()); + + //new key same as boundary element (in 11, 10, 5) + assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true)); + assertEquals(2, topNKeyFilter.getEffectiveBoundaryChecks()); } @Test