diff --git itests/hive-jmh/pom.xml itests/hive-jmh/pom.xml index c0a6564a0f..5eb30267dc 100644 --- itests/hive-jmh/pom.xml +++ itests/hive-jmh/pom.xml @@ -65,6 +65,11 @@ org.apache.hive + hive-storage-api + 2.7.0-SNAPSHOT + + + org.apache.hive hive-exec ${project.version} diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java new file mode 100644 index 0000000000..cd85148ebb --- /dev/null +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.hash; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar; +import org.apache.hive.common.util.Murmur3; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * This test measures the performance for vectorization. + *

+ * This test uses JMH framework for benchmarking. + * You may execute this benchmark tool using JMH command line in different ways: + *

+ * To use the settings shown in the main() function, use: + * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench + *

+ * To use the default settings used by JMH, use: + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench + *

+ * To specify different parameters, use: + * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will + * display the Average Time (avgt) in Microseconds (us) + * - Benchmark mode. Available modes are: + * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] + * - Output time unit. Available time units are: [m, s, ms, us, ns]. + *

+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench + * -wi 10 -i 5 -f 2 -bm avgt -tu us + */ +@State(Scope.Benchmark) +public class Murmur3Bench { + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public static class Hash64Bench { + + @Param({ "-1"}) //"123456789", "987654321", "1234", "4321", + long v; + + + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS) + public long longHash() { + long k = 0; + for (int i = 0; i < 4096; i++) { + k += Murmur3.hash64(v); + } + return k; + } + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS) + public long longBytesHash() { + ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + long k = 0; + for (int i = 0; i < 4096; i++) { + LONG_BUFFER.putLong(0, v+i); + k += Murmur3.hash64(LONG_BUFFER.array()); + } + return k; + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + Murmur3Bench.class.getSimpleName() + + ".*").build(); + new Runner(opt).run(); + } +} \ No newline at end of file diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index 8bdb47b431..07a93c69f8 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -62,9 +62,6 @@ private final static int DEFAULT_HASH_BITS = 64; private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0}); private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1}); - private final static ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES); - private final static ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES); - private final static ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); public enum EncodingType { SPARSE, DENSE @@ -212,33 +209,27 @@ public void addBytes(byte[] val) { } public void addShort(short val) { - SHORT_BUFFER.putShort(0, val); - add(Murmur3.hash64(SHORT_BUFFER.array())); + add(Murmur3.hash64(val)); } public void addInt(int val) { - INT_BUFFER.putInt(0, val); - add(Murmur3.hash64(INT_BUFFER.array())); + add(Murmur3.hash64(val)); } public void addLong(long val) { - LONG_BUFFER.putLong(0, val); - add(Murmur3.hash64(LONG_BUFFER.array())); + add(Murmur3.hash64(val)); } public void addFloat(float val) { - INT_BUFFER.putFloat(0, val); - add(Murmur3.hash64(INT_BUFFER.array())); + add(Murmur3.hash64(Float.floatToIntBits(val))); } public void addDouble(double val) { - LONG_BUFFER.putDouble(0, val); - add(Murmur3.hash64(LONG_BUFFER.array())); + add(Murmur3.hash64(Double.doubleToLongBits(val))); } public void addChar(char val) { - SHORT_BUFFER.putChar(0, val); - add(Murmur3.hash64(SHORT_BUFFER.array())); + add(Murmur3.hash64((short)val)); } /** diff --git storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java index 6ccf5ab4dd..5b1914dc49 100644 --- storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java +++ storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java @@ -156,7 +156,7 @@ public void addInt(int val) { public void addLong(long val) { // puts long in little endian order - addBytes(longToByteArrayLE(val)); + addHash(Murmur3.hash64(val)); } public void addFloat(float val) { @@ -239,7 +239,7 @@ public boolean testInt(int val) { } public boolean testLong(long val) { - return testBytes(longToByteArrayLE(val)); + return testHash(Murmur3.hash64(val)); } public boolean testFloat(float val) { diff --git storage-api/src/java/org/apache/hive/common/util/Murmur3.java storage-api/src/java/org/apache/hive/common/util/Murmur3.java index c896fa7e50..8aae28b9c4 100644 --- storage-api/src/java/org/apache/hive/common/util/Murmur3.java +++ storage-api/src/java/org/apache/hive/common/util/Murmur3.java @@ -155,6 +155,52 @@ public static long hash64(byte[] data) { return hash64(data, 0, data.length, DEFAULT_SEED); } + public static long hash64(long data) { + long hash = DEFAULT_SEED; + long k = Long.reverseBytes(data); + int length = Long.BYTES; + // mix functions + k *= C1; + k = Long.rotateLeft(k, R1); + k *= C2; + hash ^= k; + hash = Long.rotateLeft(hash, R2) * M + N1; + // finalization + hash ^= length; + hash = fmix64(hash); + return hash; + } + + public static long hash64(int data) { + long k1 = Integer.reverseBytes(data) & (-1L >>> 32); + int length = Integer.BYTES; + long hash = DEFAULT_SEED; + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + hash ^= k1; + // finalization + hash ^= length; + hash = fmix64(hash); + return hash; + } + + public static long hash64(short data) { + long hash = DEFAULT_SEED; + long k1 = 0; + k1 ^= ((long) data & 0xff) << 8; + k1 ^= ((long)((data & 0xFF00) >> 8) & 0xff); + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + hash ^= k1; + + // finalization + hash ^= Short.BYTES; + hash = fmix64(hash); + return hash; + } + public static long hash64(byte[] data, int offset, int length) { return hash64(data, offset, length, DEFAULT_SEED); } diff --git storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java index f20366b69c..16955c11a3 100644 --- storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java +++ storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java @@ -18,7 +18,7 @@ package org.apache.hive.common.util; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; import org.apache.hive.common.util.Murmur3.IncrementalHash32; import com.google.common.hash.HashFunction; @@ -222,7 +222,32 @@ public void testHashCodesM3_128_double() { assertEquals(gl2, m2); } } - + + @Test + public void test64() { + final int seed = 123, iters = 1000000; + ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES); + ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES); + ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + Random rdm = new Random(seed); + for (int i = 0; i < iters; ++i) { + long ln = rdm.nextLong(); + int in = rdm.nextInt(); + short sn = (short) (rdm.nextInt(2* Short.MAX_VALUE - 1) - Short.MAX_VALUE); + float fn = rdm.nextFloat(); + double dn = rdm.nextDouble(); + SHORT_BUFFER.putShort(0, sn); + assertEquals(Murmur3.hash64(SHORT_BUFFER.array()), Murmur3.hash64(sn)); + INT_BUFFER.putInt(0, in); + assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(in)); + LONG_BUFFER.putLong(0, ln); + assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(ln)); + INT_BUFFER.putFloat(0, fn); + assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(Float.floatToIntBits(fn))); + LONG_BUFFER.putDouble(0, dn); + assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(Double.doubleToLongBits(dn))); + } + } @Test public void testIncremental() {