diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java new file mode 100644 index 0000000..ecbdb27 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java @@ -0,0 +1,156 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.vectorization; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColDivideDoubleColumn; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.Random; + +@State(Scope.Benchmark) +public class VectorizationBench { + /** + * This test measures the performance for vectorization. + *
+ * This test uses JMH framework for benchmarking. + * You may execute this benchmark tool using JMH command line in different ways: + * + * To use the settings shown in the main() function, use: + * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.vectorization.VectorizationBench + * + * To use the default settings used by JMH, use: + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization VectorizationBench + * + * To specify different parameters, use: + * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will + * display the Average Time (avgt) in Microseconds (us) + * - Benchmark mode. Available modes are: + * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] + * - Output time unit. Available time units are: [m, s, ms, us, ns]. + * + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization VectorizationBench + * -wi 10 -i 5 -f 2 -bm avgt -tu us + */ + + private final static int COLNUM1 = 0; + private final static int COLNUM2 = 1; + private final static int OUTPUTCOLUMN = 2; + + private VectorizedRowBatch addRowBatch = new VectorizedRowBatch(3); + private VectorizedRowBatch dividedRowBatch = new VectorizedRowBatch(3); + private VectorExpression addExpression; + private VectorExpression divideExpression; + private int iterations = /*default = */1000; + private static LongColumnVector longColumnVector = new LongColumnVector(); + private static LongColumnVector dupLongColumnVector = new LongColumnVector(); + private static DoubleColumnVector doubleColumnVector = new DoubleColumnVector(); + private static DoubleColumnVector dupDoubleColumnVector = new DoubleColumnVector(); + + @Param({"Long", "Double", "LongDouble", "DoubleLong"}) + private String type; + + // @Param({"DoubleLong"}) + // private String type; + + @Setup(Level.Trial) + public void initialRowBatch() { + Random random = new Random(); + + dupLongColumnVector.fill(random.nextLong()); + dupDoubleColumnVector.fill(random.nextDouble()); + for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { + doubleColumnVector.vector[i] = random.nextDouble(); + longColumnVector.vector[i] = random.nextLong(); + } + + if (type.equalsIgnoreCase("Double")) { + addRowBatch = buildRowBatch(doubleColumnVector, dupDoubleColumnVector, new + DoubleColumnVector()); + dividedRowBatch = buildRowBatch(doubleColumnVector, dupDoubleColumnVector, new + DoubleColumnVector()); + addExpression = new DoubleColAddDoubleColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + divideExpression = new DoubleColDivideDoubleColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + } else if (type.equalsIgnoreCase("Long")) { + addRowBatch = buildRowBatch(longColumnVector, dupLongColumnVector, new LongColumnVector()); + dividedRowBatch = buildRowBatch(longColumnVector, dupLongColumnVector, new + DoubleColumnVector()); + addExpression = new LongColAddLongColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + divideExpression = new LongColDivideLongColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + } else if (type.equalsIgnoreCase("LongDouble")) { + addRowBatch = buildRowBatch(longColumnVector, doubleColumnVector, new DoubleColumnVector()); + dividedRowBatch = buildRowBatch(longColumnVector, doubleColumnVector, new + DoubleColumnVector()); + addExpression = new LongColAddDoubleColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + divideExpression = new LongColDivideDoubleColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + } else if (type.equalsIgnoreCase("DoubleLong")) { + addRowBatch = buildRowBatch(doubleColumnVector, longColumnVector, new DoubleColumnVector()); + dividedRowBatch = buildRowBatch(doubleColumnVector, longColumnVector, new + DoubleColumnVector()); + addExpression = new DoubleColAddLongColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + divideExpression = new DoubleColDivideLongColumn(COLNUM1, COLNUM2, OUTPUTCOLUMN); + } else { + throw new RuntimeException("Unsupported type"); + } + } + + private VectorizedRowBatch buildRowBatch(ColumnVector col1, ColumnVector col2, ColumnVector + output) { + VectorizedRowBatch rowBatch = new VectorizedRowBatch(3); + rowBatch.cols[COLNUM1] = col1; + rowBatch.cols[COLNUM2] = col2; + rowBatch.cols[OUTPUTCOLUMN] = output; + return rowBatch; + } + + @Benchmark + public void addExpressionEvaluationBenchMark() { + for (int i = 0; i < iterations; i++) { + addExpression.evaluate(addRowBatch); + } + } + + @Benchmark + public void divideExpressionEvaluationBenchMark() { + for (int i = 0; i < iterations; i++) { + divideExpression.evaluate(dividedRowBatch); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() + + ".*").forks(1).warmupIterations(2).measurementIterations(2).build(); + new Runner(opt).run(); + } +} \ No newline at end of file