diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java index 94af3e0..cccd810 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java @@ -13,6 +13,7 @@ */ package org.apache.hive.benchmark.vectorization; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -147,4 +148,16 @@ protected DoubleColumnVector getDoubleColumnVectorWithNull() { return columnVector; } + protected BytesColumnVector getBytesColumnVector() { + BytesColumnVector columnVector = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + int size = 64; + byte[] bytes = new byte[size * VectorizedRowBatch.DEFAULT_SIZE]; + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + columnVector.vector[i] = bytes; + columnVector.start[i] = size * i; + columnVector.length[i] = size; + } + return columnVector; + } + } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedBytesBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedBytesBench.java new file mode 100644 index 0000000..4c38530 --- /dev/null +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedBytesBench.java @@ -0,0 +1,73 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.vectorization; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColGreaterEqualStringGroupColumn; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * This test measures the performance for vectorization. + *
+ * This test uses JMH framework for benchmarking. + * You may execute this benchmark tool using JMH command line in different ways: + * + * To use the settings shown in the main() function, use: + * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.vectorization.VectorizedBytesBench + * + * To use the default settings used by JMH, use: + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization.VectorizedBytesBench + * + * To specify different parameters, use: + * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will + * display the Average Time (avgt) in Microseconds (us) + * - Benchmark mode. Available modes are: + * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] + * - Output time unit. Available time units are: [m, s, ms, us, ns]. + * + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization.VectorizedBytesBench + * -wi 10 -i 5 -f 2 -bm avgt -tu us + */ +@State(Scope.Benchmark) +public class VectorizedBytesBench { + public static class StringColEqualStringColumnBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBytesColumnVector(), getBytesColumnVector()); + rowBatch.size = 128; + expression = new StringGroupColEqualStringGroupColumn(0, 1, 2); + } + } + + public static class StringColGreaterEqualStringColumnBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBytesColumnVector(), getBytesColumnVector()); + rowBatch.size = 128; + expression = new StringGroupColGreaterEqualStringGroupColumn(0, 1, 2); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + VectorizedBytesBench.class.getSimpleName() + + ".*").build(); + new Runner(opt).run(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FastByteComparisons.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FastByteComparisons.java new file mode 100644 index 0000000..9f9d171 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FastByteComparisons.java @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.lang.reflect.Field; +import java.nio.ByteOrder; +import java.security.AccessController; +import java.security.PrivilegedAction; + +import com.google.common.primitives.UnsignedBytes; +import sun.misc.Unsafe; + +/** + * Utility code to do optimized byte-array comparison. + * This is borrowed from Hadoop's {@code FastByteComparisons} class, + * which is borrowed and slightly modified from Guava's {@code + * FastByteComparisons} class to be able to compare arrays that start at + * non-zero offsets. + */ +abstract class FastByteComparisons { + + /** + * Lexicographically compare two byte arrays. + */ + public static int compareTo(byte[] b1, int s1, int l1, byte[] b2, int s2, + int l2) { + return LexicographicalComparerHolder.BEST_COMPARER.compareTo( + b1, s1, l1, b2, s2, l2); + } + + + interface ComparerUses reflection to gracefully fall back to the Java implementation if
+ * {@code Unsafe} isn't available.
+ */
+ static class LexicographicalComparerHolder {
+ static final String UNSAFE_COMPARER_NAME =
+ LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";
+
+ static final Comparer