diff --git itests/hive-jmh/pom.xml itests/hive-jmh/pom.xml
index c0a6564a0f..5eb30267dc 100644
--- itests/hive-jmh/pom.xml
+++ itests/hive-jmh/pom.xml
@@ -65,6 +65,11 @@
org.apache.hive
+ hive-storage-api
+ 2.7.0-SNAPSHOT
+
+
+ org.apache.hive
hive-exec
${project.version}
diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
new file mode 100644
index 0000000000..cd85148ebb
--- /dev/null
+++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.benchmark.hash;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
+import org.apache.hive.common.util.Murmur3;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * This test measures the performance for vectorization.
+ *
+ * This test uses JMH framework for benchmarking.
+ * You may execute this benchmark tool using JMH command line in different ways:
+ *
+ * To use the settings shown in the main() function, use:
+ * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ *
+ * To use the default settings used by JMH, use:
+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ *
+ * To specify different parameters, use:
+ * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will
+ * display the Average Time (avgt) in Microseconds (us)
+ * - Benchmark mode. Available modes are:
+ * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all]
+ * - Output time unit. Available time units are: [m, s, ms, us, ns].
+ *
+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ * -wi 10 -i 5 -f 2 -bm avgt -tu us
+ */
+@State(Scope.Benchmark)
+public class Murmur3Bench {
+ @BenchmarkMode(Mode.AverageTime)
+ @Fork(1)
+ @State(Scope.Thread)
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ public static class Hash64Bench {
+
+ @Param({ "-1"}) //"123456789", "987654321", "1234", "4321",
+ long v;
+
+
+
+ @Benchmark
+ @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
+ public long longHash() {
+ long k = 0;
+ for (int i = 0; i < 4096; i++) {
+ k += Murmur3.hash64(v);
+ }
+ return k;
+ }
+
+ @Benchmark
+ @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
+ public long longBytesHash() {
+ ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+ long k = 0;
+ for (int i = 0; i < 4096; i++) {
+ LONG_BUFFER.putLong(0, v+i);
+ k += Murmur3.hash64(LONG_BUFFER.array());
+ }
+ return k;
+ }
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder().include(".*" + Murmur3Bench.class.getSimpleName() +
+ ".*").build();
+ new Runner(opt).run();
+ }
+}
\ No newline at end of file
diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index 8bdb47b431..07a93c69f8 100644
--- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -62,9 +62,6 @@
private final static int DEFAULT_HASH_BITS = 64;
private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0});
private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1});
- private final static ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES);
- private final static ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES);
- private final static ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
public enum EncodingType {
SPARSE, DENSE
@@ -212,33 +209,27 @@ public void addBytes(byte[] val) {
}
public void addShort(short val) {
- SHORT_BUFFER.putShort(0, val);
- add(Murmur3.hash64(SHORT_BUFFER.array()));
+ add(Murmur3.hash64(val));
}
public void addInt(int val) {
- INT_BUFFER.putInt(0, val);
- add(Murmur3.hash64(INT_BUFFER.array()));
+ add(Murmur3.hash64(val));
}
public void addLong(long val) {
- LONG_BUFFER.putLong(0, val);
- add(Murmur3.hash64(LONG_BUFFER.array()));
+ add(Murmur3.hash64(val));
}
public void addFloat(float val) {
- INT_BUFFER.putFloat(0, val);
- add(Murmur3.hash64(INT_BUFFER.array()));
+ add(Murmur3.hash64(Float.floatToIntBits(val)));
}
public void addDouble(double val) {
- LONG_BUFFER.putDouble(0, val);
- add(Murmur3.hash64(LONG_BUFFER.array()));
+ add(Murmur3.hash64(Double.doubleToLongBits(val)));
}
public void addChar(char val) {
- SHORT_BUFFER.putChar(0, val);
- add(Murmur3.hash64(SHORT_BUFFER.array()));
+ add(Murmur3.hash64((short)val));
}
/**
diff --git storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 6ccf5ab4dd..5b1914dc49 100644
--- storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -156,7 +156,7 @@ public void addInt(int val) {
public void addLong(long val) {
// puts long in little endian order
- addBytes(longToByteArrayLE(val));
+ addHash(Murmur3.hash64(val));
}
public void addFloat(float val) {
@@ -239,7 +239,7 @@ public boolean testInt(int val) {
}
public boolean testLong(long val) {
- return testBytes(longToByteArrayLE(val));
+ return testHash(Murmur3.hash64(val));
}
public boolean testFloat(float val) {
diff --git storage-api/src/java/org/apache/hive/common/util/Murmur3.java storage-api/src/java/org/apache/hive/common/util/Murmur3.java
index c896fa7e50..8aae28b9c4 100644
--- storage-api/src/java/org/apache/hive/common/util/Murmur3.java
+++ storage-api/src/java/org/apache/hive/common/util/Murmur3.java
@@ -155,6 +155,52 @@ public static long hash64(byte[] data) {
return hash64(data, 0, data.length, DEFAULT_SEED);
}
+ public static long hash64(long data) {
+ long hash = DEFAULT_SEED;
+ long k = Long.reverseBytes(data);
+ int length = Long.BYTES;
+ // mix functions
+ k *= C1;
+ k = Long.rotateLeft(k, R1);
+ k *= C2;
+ hash ^= k;
+ hash = Long.rotateLeft(hash, R2) * M + N1;
+ // finalization
+ hash ^= length;
+ hash = fmix64(hash);
+ return hash;
+ }
+
+ public static long hash64(int data) {
+ long k1 = Integer.reverseBytes(data) & (-1L >>> 32);
+ int length = Integer.BYTES;
+ long hash = DEFAULT_SEED;
+ k1 *= C1;
+ k1 = Long.rotateLeft(k1, R1);
+ k1 *= C2;
+ hash ^= k1;
+ // finalization
+ hash ^= length;
+ hash = fmix64(hash);
+ return hash;
+ }
+
+ public static long hash64(short data) {
+ long hash = DEFAULT_SEED;
+ long k1 = 0;
+ k1 ^= ((long) data & 0xff) << 8;
+ k1 ^= ((long)((data & 0xFF00) >> 8) & 0xff);
+ k1 *= C1;
+ k1 = Long.rotateLeft(k1, R1);
+ k1 *= C2;
+ hash ^= k1;
+
+ // finalization
+ hash ^= Short.BYTES;
+ hash = fmix64(hash);
+ return hash;
+ }
+
public static long hash64(byte[] data, int offset, int length) {
return hash64(data, offset, length, DEFAULT_SEED);
}
diff --git storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
index f20366b69c..16955c11a3 100644
--- storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
+++ storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
@@ -18,7 +18,7 @@
package org.apache.hive.common.util;
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
import org.apache.hive.common.util.Murmur3.IncrementalHash32;
import com.google.common.hash.HashFunction;
@@ -222,7 +222,32 @@ public void testHashCodesM3_128_double() {
assertEquals(gl2, m2);
}
}
-
+
+ @Test
+ public void test64() {
+ final int seed = 123, iters = 1000000;
+ ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES);
+ ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES);
+ ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+ Random rdm = new Random(seed);
+ for (int i = 0; i < iters; ++i) {
+ long ln = rdm.nextLong();
+ int in = rdm.nextInt();
+ short sn = (short) (rdm.nextInt(2* Short.MAX_VALUE - 1) - Short.MAX_VALUE);
+ float fn = rdm.nextFloat();
+ double dn = rdm.nextDouble();
+ SHORT_BUFFER.putShort(0, sn);
+ assertEquals(Murmur3.hash64(SHORT_BUFFER.array()), Murmur3.hash64(sn));
+ INT_BUFFER.putInt(0, in);
+ assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(in));
+ LONG_BUFFER.putLong(0, ln);
+ assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(ln));
+ INT_BUFFER.putFloat(0, fn);
+ assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(Float.floatToIntBits(fn)));
+ LONG_BUFFER.putDouble(0, dn);
+ assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(Double.doubleToLongBits(dn)));
+ }
+ }
@Test
public void testIncremental() {