From 837fce2a3a7c35cb7735f91cd0d9919a33da2e98 Mon Sep 17 00:00:00 2001 From: Gopal V Date: Sat, 12 May 2018 01:05:31 -0700 Subject: [PATCH] hash64: WIP jmh tests --- itests/hive-jmh/pom.xml | 5 + .../apache/hive/benchmark/hash/Murmur3Bench.java | 107 +++++++++++++++++++++ .../hadoop/hive/common/ndv/hll/HyperLogLog.java | 21 ++-- .../org/apache/hive/common/util/BloomKFilter.java | 4 +- .../java/org/apache/hive/common/util/Murmur3.java | 44 +++++++++ 5 files changed, 164 insertions(+), 17 deletions(-) create mode 100644 itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java diff --git itests/hive-jmh/pom.xml itests/hive-jmh/pom.xml index c0a6564..5eb3026 100644 --- itests/hive-jmh/pom.xml +++ itests/hive-jmh/pom.xml @@ -65,6 +65,11 @@ org.apache.hive + hive-storage-api + 2.7.0-SNAPSHOT + + + org.apache.hive hive-exec ${project.version} diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java new file mode 100644 index 0000000..cd85148 --- /dev/null +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.hash; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar; +import org.apache.hive.common.util.Murmur3; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * This test measures the performance for vectorization. + *

+ * This test uses JMH framework for benchmarking. + * You may execute this benchmark tool using JMH command line in different ways: + *

+ * To use the settings shown in the main() function, use: + * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench + *

+ * To use the default settings used by JMH, use: + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench + *

+ * To specify different parameters, use: + * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will + * display the Average Time (avgt) in Microseconds (us) + * - Benchmark mode. Available modes are: + * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] + * - Output time unit. Available time units are: [m, s, ms, us, ns]. + *

+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench + * -wi 10 -i 5 -f 2 -bm avgt -tu us + */ +@State(Scope.Benchmark) +public class Murmur3Bench { + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public static class Hash64Bench { + + @Param({ "-1"}) //"123456789", "987654321", "1234", "4321", + long v; + + + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS) + public long longHash() { + long k = 0; + for (int i = 0; i < 4096; i++) { + k += Murmur3.hash64(v); + } + return k; + } + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS) + public long longBytesHash() { + ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + long k = 0; + for (int i = 0; i < 4096; i++) { + LONG_BUFFER.putLong(0, v+i); + k += Murmur3.hash64(LONG_BUFFER.array()); + } + return k; + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + Murmur3Bench.class.getSimpleName() + + ".*").build(); + new Runner(opt).run(); + } +} \ No newline at end of file diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java index ec33691..cff3cb8 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -60,9 +60,6 @@ private final static int DEFAULT_HASH_BITS = 64; private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0}); private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1}); - private final static ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES); - private final static ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES); - private final static ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); public enum EncodingType { SPARSE, DENSE @@ -217,33 +214,27 @@ public void addBytes(byte[] val) { } public void addShort(short val) { - SHORT_BUFFER.putShort(0, val); - add(Murmur3.hash64(SHORT_BUFFER.array())); + add(Murmur3.hash64(val)); } public void addInt(int val) { - INT_BUFFER.putInt(0, val); - add(Murmur3.hash64(INT_BUFFER.array())); + add(Murmur3.hash64(val)); } public void addLong(long val) { - LONG_BUFFER.putLong(0, val); - add(Murmur3.hash64(LONG_BUFFER.array())); + add(Murmur3.hash64(val)); } public void addFloat(float val) { - INT_BUFFER.putFloat(0, val); - add(Murmur3.hash64(INT_BUFFER.array())); + add(Murmur3.hash64(Float.floatToIntBits(val))); } public void addDouble(double val) { - LONG_BUFFER.putDouble(0, val); - add(Murmur3.hash64(LONG_BUFFER.array())); + add(Murmur3.hash64(Double.doubleToLongBits(val))); } public void addChar(char val) { - SHORT_BUFFER.putChar(0, val); - add(Murmur3.hash64(SHORT_BUFFER.array())); + add(Murmur3.hash64((short)val)); } /** diff --git storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java index 6ccf5ab..5b1914d 100644 --- storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java +++ storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java @@ -156,7 +156,7 @@ public void addInt(int val) { public void addLong(long val) { // puts long in little endian order - addBytes(longToByteArrayLE(val)); + addHash(Murmur3.hash64(val)); } public void addFloat(float val) { @@ -239,7 +239,7 @@ public boolean testInt(int val) { } public boolean testLong(long val) { - return testBytes(longToByteArrayLE(val)); + return testHash(Murmur3.hash64(val)); } public boolean testFloat(float val) { diff --git storage-api/src/java/org/apache/hive/common/util/Murmur3.java storage-api/src/java/org/apache/hive/common/util/Murmur3.java index c896fa7..23481c4 100644 --- storage-api/src/java/org/apache/hive/common/util/Murmur3.java +++ storage-api/src/java/org/apache/hive/common/util/Murmur3.java @@ -154,6 +154,50 @@ public static int hash32(byte[] data, int offset, int length, int seed) { public static long hash64(byte[] data) { return hash64(data, 0, data.length, DEFAULT_SEED); } + + public static long hash64(long data) { + long hash = DEFAULT_SEED; + long k = Long.reverseBytes(data); + int length = Long.BYTES; + // mix functions + k *= C1; + k = Long.rotateLeft(k, R1); + k *= C2; + hash ^= k; + hash = Long.rotateLeft(hash, R2) * M + N1; + // finalization + hash ^= length; + hash = fmix64(hash); + return hash; + } + + public static long hash64(int data) { + long k1 = Integer.reverseBytes(data) & (-1L >>> 32); + int length = Integer.BYTES; + long hash = DEFAULT_SEED; + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + hash ^= k1; + // finalization + hash ^= length; + hash = fmix64(hash); + return hash; + } + + public static long hash64(short data) { + long k1 = Short.reverseBytes(data) & (-1L >>> 32); + int length = Short.BYTES; + long hash = DEFAULT_SEED; + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + hash ^= k1; + // finalization + hash ^= length; + hash = fmix64(hash); + return hash; + } public static long hash64(byte[] data, int offset, int length) { return hash64(data, offset, length, DEFAULT_SEED); -- 2.4.0