diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java index 391d052..0e880c6 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java @@ -17,7 +17,10 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; @@ -124,6 +127,34 @@ protected LongColumnVector getLongColumnVectorWithNull() { return columnVector; } + protected LongColumnVector getBooleanLongColumnVector() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + columnVector.vector[i] = random.nextInt(2); + } + return columnVector; + } + + protected LongColumnVector getBooleanRepeatingLongColumnVector() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.fill(1); + return columnVector; + } + + protected LongColumnVector getBooleanLongColumnVectorWithNull() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.noNulls = false; + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + if (i % 100 == 0) { + columnVector.isNull[i] = true; + } + columnVector.vector[i] = random.nextInt(2); + } + return columnVector; + } + protected DoubleColumnVector getDoubleColumnVector() { DoubleColumnVector columnVector = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); Random random = new Random(); @@ -209,6 +240,68 @@ public void setup() { } } + public static class ColAndColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColAndCol(0, 1, 2); + } + } + + public static class ColAndRepeatingColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanRepeatingLongColumnVector()); + expression = new ColAndCol(0, 1, 2); + } + } + + public static class RepeatingColAndColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanRepeatingLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColAndCol(0, 1, 2); + } + } + + public static class ColOrColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColOrCol(0, 1, 2); + } + } + + public static class ColOrRepeatingColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanRepeatingLongColumnVector()); + expression = new ColOrCol(0, 1, 2); + } + } + + public static class RepeatingColOrColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanRepeatingLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColOrCol(0, 1, 2); + } + } + + public static class NotColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 1, getBooleanLongColumnVector()); + expression = new NotCol(0, 1); + } + } + public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() + ".*").build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 72df220..ff7371d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -64,6 +64,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } + long vector1Value = vector1[0]; + long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero @@ -74,11 +76,11 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; } } outV.isRepeating = false; @@ -86,11 +88,11 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; } } outV.isRepeating = false; @@ -120,12 +122,12 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } @@ -134,12 +136,12 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } @@ -172,12 +174,12 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } @@ -186,12 +188,12 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } @@ -226,14 +228,14 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); @@ -244,14 +246,14 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 5e23446..60ed2d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -67,6 +67,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } + long vector1Value = vector1[0]; + long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero @@ -77,11 +79,11 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; } } outV.isRepeating = false; @@ -89,11 +91,11 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; } } outV.isRepeating = false; @@ -123,13 +125,13 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; - outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; + outputVector[i] = vector1Value | vector2[i]; + outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; - outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; + outputVector[i] = vector1Value | vector2[i]; + outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } outV.isRepeating = false; @@ -137,12 +139,12 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } @@ -175,12 +177,12 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } @@ -189,13 +191,13 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + outputVector[i] = vector1[i] | vector2Value; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + outputVector[i] = vector1[i] | vector2Value; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } outV.isRepeating = false; @@ -229,14 +231,14 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); @@ -247,14 +249,14 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index 604d154..ea2a434 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -62,17 +62,17 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; - // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1 - outputVector[0] = ~vector[0] & 1; + // 0 XOR 1 yields 1, 1 XOR 1 yields 0 + outputVector[0] = vector[0] ^ 1; } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; } outV.isRepeating = false; } else { for (int i = 0; i != n; i++) { - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; } outV.isRepeating = false; } @@ -80,19 +80,19 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; if (inputColVector.isRepeating) { outV.isRepeating = true; - outputVector[0] = ~vector[0] & 1; + outputVector[0] = vector[0] ^ 1; outV.isNull[0] = inputColVector.isNull[0]; } else if (batch.selectedInUse) { outV.isRepeating = false; for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } } else { outV.isRepeating = false; for (int i = 0; i != n; i++) { - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } }