diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml index 0ff584c..94abbbf 100644 --- a/itests/hive-jmh/pom.xml +++ b/itests/hive-jmh/pom.xml @@ -32,7 +32,7 @@ ../.. UTF-8 - 1.4.1 + 1.19 benchmarks @@ -54,6 +54,16 @@ ${project.version} + org.apache.tez + tez-api + ${tez.version} + + + org.apache.tez + tez-runtime-internals + ${tez.version} + + org.apache.hive hive-exec ${project.version} @@ -91,6 +101,7 @@ ${uberjar.name} + org.openjdk.jmh.Main @@ -112,6 +123,13 @@ + + + com.github.edwgiz + maven-shade-plugin.log4j2-cachefile-transformer + 2.1 + + diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java index 879b437..a703eb8 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/AbstractExpression.java @@ -13,6 +13,9 @@ */ package org.apache.hive.benchmark.vectorization; +import java.util.Random; +import java.util.concurrent.TimeUnit; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -30,9 +33,6 @@ import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; -import java.util.Random; -import java.util.concurrent.TimeUnit; - @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java new file mode 100644 index 0000000..d86042a --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/BlackholeOperator.java @@ -0,0 +1,44 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.vectorization; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.openjdk.jmh.infra.Blackhole; + +public class BlackholeOperator extends Operator { + private Blackhole bh; + + public BlackholeOperator(CompilationOpContext cContext, Blackhole bh) { + super(cContext); + this.bh = bh; + } + + @Override + public void process(final Object row, final int tag) throws HiveException { + bh.consume(row); + } + + @Override + public String getName() { + return "Blackhole Operator"; + } + + @Override + public OperatorType getType() { + return OperatorType.FILESINK; + } +} diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java new file mode 100644 index 0000000..d80b6d4 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/ColumnVectorGenUtil.java @@ -0,0 +1,256 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization; + +import java.sql.Timestamp; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +public class ColumnVectorGenUtil { + + private static final long LONG_VECTOR_NULL_VALUE = 1; + private static final double DOUBLE_VECTOR_NULL_VALUE = Double.NaN; + + public static VectorizedRowBatch getVectorizedRowBatch(int size, int numCol, int seed) { + VectorizedRowBatch vrg = new VectorizedRowBatch(numCol, size); + for (int j = 0; j < numCol; j++) { + LongColumnVector lcv = new LongColumnVector(size); + for (int i = 0; i < size; i++) { + lcv.vector[i] = (i + 1) * seed * (j + 1); + } + vrg.cols[j] = lcv; + } + vrg.size = size; + return vrg; + } + + public static ColumnVector generateColumnVector(TypeInfo typeInfo, boolean nulls, boolean repeating, int size, + Random rand) { + if (typeInfo.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { + switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DATE: + return generateLongColumnVector(nulls, repeating, size, rand); + case FLOAT: + case DOUBLE: + return generateDoubleColumnVector(nulls, repeating, size, rand); + case DECIMAL: + return generateDecimalColumnVector(((DecimalTypeInfo) typeInfo), nulls, repeating, size, rand); + case CHAR: + case VARCHAR: + case STRING: + case BINARY: + return generateBytesColumnVector(nulls, repeating, size, rand); + case TIMESTAMP: + return generateTimestampColumnVector(nulls, repeating, size, rand); + // TODO: add interval and complex types + } + } + throw new RuntimeException("Unsupported type info category: " + typeInfo.getCategory()); + } + + public static BytesColumnVector generateBytesColumnVector( + boolean nulls, boolean repeating, int size, Random rand) { + BytesColumnVector bcv = new BytesColumnVector(size); + bcv.initBuffer(10); + bcv.noNulls = !nulls; + bcv.isRepeating = repeating; + + byte[] repeatingValue = new byte[10]; + rand.nextBytes(repeatingValue); + + int nullFrequency = generateNullFrequency(rand); + + for (int i = 0; i < size; i++) { + if (nulls && (repeating || i % nullFrequency == 0)) { + bcv.isNull[i] = true; + bcv.setVal(0, new byte[]{0}); + } else { + bcv.isNull[i] = false; + if (repeating) { + bcv.setVal(i, repeatingValue, 0, repeatingValue.length); + } else { + String val = String.valueOf("value_" + i); + bcv.setVal(i, val.getBytes(), 0, val.length()); + } + } + } + return bcv; + } + + public static LongColumnVector generateLongColumnVector( + boolean nulls, boolean repeating, int size, Random rand) { + LongColumnVector lcv = new LongColumnVector(size); + + lcv.noNulls = !nulls; + lcv.isRepeating = repeating; + + long repeatingValue; + do { + repeatingValue = rand.nextLong(); + } while (repeatingValue == 0); + + int nullFrequency = generateNullFrequency(rand); + + for (int i = 0; i < size; i++) { + if (nulls && (repeating || i % nullFrequency == 0)) { + lcv.isNull[i] = true; + lcv.vector[i] = LONG_VECTOR_NULL_VALUE; + + } else { + lcv.isNull[i] = false; + lcv.vector[i] = repeating ? repeatingValue : rand.nextLong(); + if (lcv.vector[i] == 0) { + i--; + } + } + } + return lcv; + } + + private static ColumnVector generateTimestampColumnVector(final boolean nulls, + final boolean repeating, final int size, final Random rand) { + Timestamp[] timestamps = new Timestamp[size]; + for (int i = 0; i < size; i++) { + timestamps[i] = new Timestamp(rand.nextInt()); + } + return generateTimestampColumnVector(nulls, repeating, size, rand, timestamps); + } + + public static TimestampColumnVector generateTimestampColumnVector( + boolean nulls, boolean repeating, int size, Random rand, Timestamp[] timestampValues) { + TimestampColumnVector tcv = new TimestampColumnVector(size); + + tcv.noNulls = !nulls; + tcv.isRepeating = repeating; + + Timestamp repeatingTimestamp = RandomTypeUtil.getRandTimestamp(rand); + + int nullFrequency = generateNullFrequency(rand); + + for (int i = 0; i < size; i++) { + if (nulls && (repeating || i % nullFrequency == 0)) { + tcv.isNull[i] = true; + tcv.setNullValue(i); + timestampValues[i] = null; + } else { + tcv.isNull[i] = false; + if (!repeating) { + Timestamp randomTimestamp = RandomTypeUtil.getRandTimestamp(rand); + tcv.set(i, randomTimestamp); + timestampValues[i] = randomTimestamp; + } else { + tcv.set(i, repeatingTimestamp); + timestampValues[i] = repeatingTimestamp; + } + } + } + return tcv; + } + + public static DoubleColumnVector generateDoubleColumnVector(boolean nulls, + boolean repeating, int size, Random rand) { + DoubleColumnVector dcv = new DoubleColumnVector(size); + + dcv.noNulls = !nulls; + dcv.isRepeating = repeating; + + double repeatingValue; + do { + repeatingValue = rand.nextDouble(); + } while (repeatingValue == 0); + + int nullFrequency = generateNullFrequency(rand); + + for (int i = 0; i < size; i++) { + if (nulls && (repeating || i % nullFrequency == 0)) { + dcv.isNull[i] = true; + dcv.vector[i] = DOUBLE_VECTOR_NULL_VALUE; + + } else { + dcv.isNull[i] = false; + dcv.vector[i] = repeating ? repeatingValue : rand.nextDouble(); + + if (dcv.vector[i] == 0) { + i--; + } + } + } + return dcv; + } + + public static DecimalColumnVector generateDecimalColumnVector(DecimalTypeInfo typeInfo, boolean nulls, + boolean repeating, int size, Random rand) { + DecimalColumnVector dcv = + new DecimalColumnVector(size, typeInfo.precision(), typeInfo.scale()); + + dcv.noNulls = !nulls; + dcv.isRepeating = repeating; + + HiveDecimalWritable repeatingValue = new HiveDecimalWritable(); + do { + repeatingValue.set(HiveDecimal.create(((Double) rand.nextDouble()).toString()) + .setScale((short) typeInfo.scale(), HiveDecimal.ROUND_HALF_UP)); + } while (repeatingValue.getHiveDecimal().doubleValue() == 0); + + int nullFrequency = generateNullFrequency(rand); + + for (int i = 0; i < size; i++) { + if (nulls && (repeating || i % nullFrequency == 0)) { + dcv.isNull[i] = true; + dcv.vector[i] = null; + + } else { + dcv.isNull[i] = false; + if (repeating) { + dcv.vector[i].set(repeatingValue); + } else { + dcv.vector[i].set(HiveDecimal.create(((Double) rand.nextDouble()).toString()) + .setScale((short) typeInfo.scale(), HiveDecimal.ROUND_HALF_UP)); + } + + if (dcv.vector[i].getHiveDecimal().doubleValue() == 0) { + i--; + } + } + } + return dcv; + } + + private static int generateNullFrequency(Random rand) { + return 60 + rand.nextInt(20); + } + +} diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java index 136c01b..cc4c96c 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedLikeBench.java @@ -17,6 +17,8 @@ */ package org.apache.hive.benchmark.vectorization; +import java.nio.charset.StandardCharsets; + import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; @@ -25,8 +27,6 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import java.nio.charset.StandardCharsets; - /** * This test measures the performance for vectorization. *

diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java index 324f562..cfd713c 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java @@ -13,31 +13,26 @@ */ package org.apache.hive.benchmark.vectorization.mapjoin; +import java.util.concurrent.TimeUnit; + import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.tez.ObjectCache; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -45,15 +40,12 @@ import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; -import java.util.Random; -import java.util.concurrent.TimeUnit; - -// UNDONE: For now, just run once cold. -@BenchmarkMode(Mode.SingleShotTime) +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(1) @State(Scope.Thread) @OutputTimeUnit(TimeUnit.MILLISECONDS) @@ -72,7 +64,6 @@ protected VectorizedRowBatch[] bigTableBatches; @Benchmark - // @Warmup(iterations = 0, time = 1, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.MILLISECONDS) public void bench() throws Exception { if (!isVectorOutput) { @@ -103,6 +94,9 @@ protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, // Prepare data. Good for ANY implementation variation. testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl(); + ObjectCache.setupObjectRegistry(objectRegistry); operator = setupBenchmarkImplementation( mapJoinImplementation, testDesc, testData); diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java index f183bb5..7a5b721 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java @@ -18,107 +18,15 @@ package org.apache.hive.benchmark.vectorization.mapjoin; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; -import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.JoinCondDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench; -import org.apache.hive.common.util.HashCodeUtil; -import org.apache.hive.common.util.ReflectionUtil; -import org.junit.Test; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.SortedMap; -import java.util.TreeMap; - import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.profile.StackProfiler; /* * Simple one long key map join benchmarks. diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java index 8d39953..b613377 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java @@ -18,107 +18,15 @@ package org.apache.hive.benchmark.vectorization.mapjoin; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; -import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.JoinCondDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench; -import org.apache.hive.common.util.HashCodeUtil; -import org.apache.hive.common.util.ReflectionUtil; -import org.junit.Test; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.SortedMap; -import java.util.TreeMap; - import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.profile.StackProfiler; /* * Simple one long key map join benchmarks. diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java index 9857ae2..88032a2 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java @@ -18,107 +18,15 @@ package org.apache.hive.benchmark.vectorization.mapjoin; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; -import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.JoinCondDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench; -import org.apache.hive.common.util.HashCodeUtil; -import org.apache.hive.common.util.ReflectionUtil; -import org.junit.Test; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.SortedMap; -import java.util.TreeMap; - import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.profile.StackProfiler; /* * Simple one long key map join benchmarks. diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java new file mode 100644 index 0000000..f17eb3e --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/AbstractOperatorBench.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.operators; + +import java.util.concurrent.TimeUnit; + +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Warmup; + +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +public class AbstractOperatorBench { +} diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java new file mode 100644 index 0000000..3a76b56 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java @@ -0,0 +1,233 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.operators; + +import java.util.ArrayList; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.benchmark.vectorization.ColumnVectorGenUtil; +import org.apache.orc.TypeDescription; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import com.google.common.collect.ImmutableList; + +@State(Scope.Benchmark) +public class VectorGroupByOperatorBench extends AbstractOperatorBench { + + @Param({ + "true", + "false" + }) + private boolean hasNulls; + + @Param({ + "true", + "false" + }) + private boolean isRepeating; + + @Param({ + "PARTIAL1", + "PARTIAL2", + "FINAL", + "COMPLETE" + }) + private GenericUDAFEvaluator.Mode evalMode; + + @Param({ + "GLOBAL", + "HASH" + }) + private VectorGroupByDesc.ProcessingMode processMode; + + @Param({ + "count", + "min", + "max", + "sum", + "avg", + "variance", + "var_pop", + "var_samp", + "stddev", + "stddev_pop", + "stddev_samp", + "bloom_filter" + }) + private String aggregation; + + @Param({ + "bigint", + "double", + "string", + "decimal(7,2)", // to use this via command line arg "decimal(7_2)" + "decimal(38,18)", // to use this via command line arg "decimal(38_18)" + "timestamp" + }) + private String dataType; + + private Random rand = new Random(1234); + private VectorGroupByOperator vgo; + private VectorizedRowBatch vrb; + private int size = VectorizedRowBatch.DEFAULT_SIZE; + + @Setup + public void setup() { + try { + dataType = dataType.replaceAll("_", ","); + TypeInfo typeInfo = TypeInfoFactory.getPrimitiveTypeInfo(dataType); + ColumnVector cv = ColumnVectorGenUtil.generateColumnVector(typeInfo, hasNulls, isRepeating, size, rand); + TypeDescription typeDescription = TypeDescription.fromString(dataType); + vrb = typeDescription.createRowBatch(size); + vrb.size = size; + vrb.cols[0] = cv; + VectorizationContext ctx = new VectorizationContext("name", ImmutableList.of("A")); + GroupByDesc desc = buildGroupByDescType(aggregation, evalMode, "A", typeInfo, processMode); + Operator groupByOp = OperatorFactory.get(new CompilationOpContext(), desc); + vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx); + vgo.initialize(new Configuration(), null); + } catch (Exception e) { + // likely unsupported combination of params + // https://bugs.openjdk.java.net/browse/CODETOOLS-7901296 is not available yet to skip benchmark cleanly + System.out.println("Skipping.. Exception: " + e.getMessage()); + System.exit(0); + } + } + + private GroupByDesc buildGroupByDescType( + String aggregate, + GenericUDAFEvaluator.Mode mode, + String column, + TypeInfo dataType, + final VectorGroupByDesc.ProcessingMode processMode) throws SemanticException { + + AggregationDesc agg = buildAggregationDesc(aggregate, mode, column, dataType); + ArrayList aggs = new ArrayList(); + aggs.add(agg); + + ArrayList outputColumnNames = new ArrayList(); + outputColumnNames.add("_col0"); + + GroupByDesc desc = new GroupByDesc(); + desc.setVectorDesc(new VectorGroupByDesc()); + + desc.setOutputColumnNames(outputColumnNames); + desc.setAggregators(aggs); + ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(processMode); + + return desc; + } + + private AggregationDesc buildAggregationDesc( + String aggregate, + GenericUDAFEvaluator.Mode mode, + String column, + TypeInfo typeInfo) throws SemanticException { + + ExprNodeDesc inputColumn = new ExprNodeColumnDesc(typeInfo, column, "table", false); + + ArrayList params = new ArrayList(); + params.add(inputColumn); + + AggregationDesc agg = new AggregationDesc(); + ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); + GenericUDAFEvaluator genericUDAFEvaluator = FunctionRegistry.getGenericUDAFEvaluator(aggregate, + ImmutableList.of(oi).asList(), false, false); + agg.setGenericUDAFEvaluator(genericUDAFEvaluator); + if (aggregate.equals("bloom_filter")) { + GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = + (GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); + udafBloomFilterEvaluator.setHintEntries(10000); + } + agg.setGenericUDAFName(aggregate); + agg.setMode(mode); + agg.setParameters(params); + + return agg; + } + + @TearDown + public void tearDown() throws HiveException { + vgo.close(false); + } + + @Benchmark + public void testAggCount() throws HiveException { + vgo.process(vrb, 0); + } + + /* + * ============================== HOW TO RUN THIS TEST: ==================================== + * + * You can run this test: + * + * a) Via the command line: + * $ mvn clean install + * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof perf -f 1 (Linux) + * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof perfnorm -f 3 (Linux) + * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof perfasm -f 1 (Linux) + * $ java -jar target/benchmarks.jar VectorGroupByOperatorCountBench -prof gc -f 1 (allocation counting via gc) + * $ java -jar target/benchmarks.jar VectorGroupByOperatorBench -p hasNulls=true -p isRepeating=false -p aggregation=bloom_filter -p processMode=HASH -p evalMode=PARTIAL1 + * $ java -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:6006,suspend=y,server=y -jar target/benchmarks.jar VectorGroupByOperatorBench + */ + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(VectorGroupByOperatorBench.class.getSimpleName()) + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) + .build(); + new Runner(opt).run(); + } +} \ No newline at end of file diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java new file mode 100644 index 0000000..3236a84 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorSelectOperatorBench.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.operators; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hive.benchmark.vectorization.BlackholeOperator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@State(Scope.Benchmark) +public class VectorSelectOperatorBench extends AbstractOperatorBench { + + private SelectDesc selDesc; + private VectorSelectOperator vso; + private VectorizedRowBatch vrg; + private List> child; + private List> EMPTY_CHILD = new ArrayList<>(); + + @Setup + public void setup(Blackhole bh) throws HiveException { + HiveConf hconf = new HiveConf(); + List columns = new ArrayList(); + columns.add("a"); + columns.add("b"); + columns.add("c"); + VectorizationContext vc = new VectorizationContext("name", columns); + + selDesc = new SelectDesc(false); + List colList = new ArrayList(); + ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Long.class, "a", "table", false); + ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Long.class, "b", "table", false); + ExprNodeColumnDesc colDesc3 = new ExprNodeColumnDesc(Long.class, "c", "table", false); + ExprNodeGenericFuncDesc plusDesc = new ExprNodeGenericFuncDesc(); + GenericUDF gudf = new GenericUDFOPPlus(); + + plusDesc.setGenericUDF(gudf); + List children = new ArrayList(); + children.add(colDesc1); + children.add(colDesc2); + plusDesc.setChildren(children); + plusDesc.setTypeInfo(TypeInfoFactory.longTypeInfo); + + colList.add(plusDesc); + colList.add(colDesc3); + selDesc.setColList(colList); + + List outputColNames = new ArrayList(); + outputColNames.add("_col0"); + outputColNames.add("_col1"); + selDesc.setOutputColumnNames(outputColNames); + + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + selDesc.setVectorDesc(vectorSelectDesc); + List selectColList = selDesc.getColList(); + VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()]; + for (int i = 0; i < selectColList.size(); i++) { + ExprNodeDesc expr = selectColList.get(i); + VectorExpression ve = vc.getVectorExpression(expr); + vectorSelectExprs[i] = ve; + } + vectorSelectDesc.setSelectExpressions(vectorSelectExprs); + vectorSelectDesc.setProjectedOutputColumns(new int[]{3, 2}); + + CompilationOpContext opContext = new CompilationOpContext(); + vso = new VectorSelectOperator(opContext, vc, selDesc); + // to trigger vectorForward + child = new ArrayList<>(); + child.add(new BlackholeOperator(opContext, bh)); + child.add(new BlackholeOperator(opContext, bh)); + vso.initialize(hconf, null); + vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch( + VectorizedRowBatch.DEFAULT_SIZE, 4, 17); + } + + @TearDown + public void tearDown() throws HiveException { + vso.close(false); + } + + @Benchmark + public void testSelectStar() throws HiveException { + selDesc.setSelStarNoCompute(true); + vso.process(vrg, 0); + } + + @Benchmark + public void testVectorSelectBaseForward() throws HiveException { + selDesc.setSelStarNoCompute(false); + vso.setChildOperators(EMPTY_CHILD); + vso.process(vrg, 0); + } + + @Benchmark + public void testVectorSelectVectorForward() throws HiveException { + selDesc.setSelStarNoCompute(false); + vso.setChildOperators(child); + vso.process(vrg, 0); + } + + /* + * ============================== HOW TO RUN THIS TEST: ==================================== + * + * You can run this test: + * + * a) Via the command line: + * $ mvn clean install + * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof perf -f 1 (Linux) + * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof perfnorm -f 3 (Linux) + * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof perfasm -f 1 (Linux) + * $ java -jar target/benchmarks.jar VectorSelectOperatorBench -prof gc -f 1 (allocation counting via gc) + */ + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(VectorSelectOperatorBench.class.getSimpleName()) + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) + .build(); + new Runner(opt).run(); + } +} \ No newline at end of file diff --git a/itests/hive-jmh/src/main/resources/log4j2.properties b/itests/hive-jmh/src/main/resources/log4j2.properties new file mode 100644 index 0000000..3357240 --- /dev/null +++ b/itests/hive-jmh/src/main/resources/log4j2.properties @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +status = FATAL +name = HiveJMH + +# list of properties +property.hive-jmh.log.level = ERROR +property.hive-jmh.root.logger = DRFA +property.hive-jmh.log.dir = target/tmp/log +property.hive-jmh.log.file = hive-jmh.log + +# list of all appenders +appenders = console, DRFA + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n + +# daily rolling file appender +appender.DRFA.type = RollingFile +appender.DRFA.name = DRFA +appender.DRFA.fileName = ${sys:hive-jmh.log.dir}/${sys:hive-jmh.log.file} +appender.DRFA.filePattern = ${sys:hive-jmh.log.dir}/${sys:hive-jmh.log.file}.%d{yyyy-MM-dd} +appender.DRFA.layout.type = PatternLayout +appender.DRFA.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n +appender.DRFA.policies.type = Policies +appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy +appender.DRFA.policies.time.interval = 1 +appender.DRFA.policies.time.modulate = true +appender.DRFA.strategy.type = DefaultRolloverStrategy +appender.DRFA.strategy.max = 30 + +# list of all loggers +loggers = SparkIMain, SparkILoop, Jetty, AbstractLifeCycle + +logger.SparkIMain.name = org.apache.hive-jmh.repl.SparkIMain$exprTyper +logger.SparkIMain.level = INFO + +logger.SparkILoop.name = org.apache.hive-jmh.repl.SparkILoop$SparkILoopInterpreter +logger.SparkILoop.level = INFO + +logger.Jetty.name = org.eclipse.jetty +logger.Jetty.level = WARN + +logger.AbstractLifeCycle.name = org.eclipse.jetty.util.component.AbstractLifeCycle +logger.AbstractLifeCycle.level = ERROR + +# root logger +rootLogger.level = ${sys:hive-jmh.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive-jmh.root.logger} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java index 5b597db..4bcfa12 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerateUtil.java @@ -18,24 +18,14 @@ package org.apache.hadoop.hive.ql.exec.vector.util.batchgen; -import java.util.Arrays; -import java.util.Random; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import com.google.common.base.Preconditions; - public class VectorBatchGenerateUtil { public static Object[][] generateRowObjectArray(TypeInfo[] typeInfos,