diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 17d209a..4a045e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -21,19 +21,23 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This class performs OR expression on two input columns and stores, + * the boolean output in a separate output column. The boolean values + * are supposed to be represented as 0/1 in a long vector. + */ public class ColOrCol extends VectorExpression { - int colNum1; - int colNum2; - int outputColumn; + private final int colNum1; + private final int colNum2; + private final int outputColumn; - public ColOrCol(int colNum1, int colNum2, int outputColumn) - { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } + public ColOrCol(int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { @@ -50,71 +54,71 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; long[] outputVector = outV.vector; if (n <= 0) { - //Nothing to do + // Nothing to do return; } if (inputColVector1.noNulls && inputColVector2.noNulls) { - if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0] ; + outputVector[0] = vector1[0] | vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i] ; + outputVector[i] = vector1[0] | vector2[i]; } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[0] | vector2[i]; } } outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0] ; + outputVector[i] = vector1[i] | vector2[0]; } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[0]; } } outV.isRepeating = false; - } else /* neither side is repeating */ { + } else /* neither side is repeating */{ if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i] ; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[i]; - } - } + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[i]; + } + } else { + for (int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[i]; + } + } outV.isRepeating = false; } outV.noNulls = true; } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls - if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0] ; - outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; + outputVector[0] = vector1[0] | vector2[0]; + outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i] ; + outputVector[i] = vector1[0] | vector2[i]; outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[0] | vector2[i]; outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; } @@ -122,27 +126,27 @@ public void evaluate(VectorizedRowBatch batch) { outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0] ; + outputVector[i] = vector1[i] | vector2[0]; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[0]; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } outV.isRepeating = false; - } else /* neither side is repeating */ { + } else /* neither side is repeating */{ if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i] ; + outputVector[i] = vector1[i] | vector2[i]; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } @@ -152,69 +156,69 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls - if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0] ; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); + outputVector[0] = vector1[0] | vector2[0]; + outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i] ; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputVector[i] = vector1[0] | vector2[i]; + outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[0] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0] ; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + outputVector[i] = vector1[i] | vector2[0]; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[0]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); } } outV.isRepeating = false; - } else /* neither side is repeating */ { + } else /* neither side is repeating */{ if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i] ; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputVector[i] = vector1[i] | vector2[i]; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } outV.isRepeating = false; } outV.noNulls = false; - } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */ { + } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ // either input 1 or input 2 may have nulls - if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. outV.isRepeating = true; - outputVector[0] = vector1[0] | vector2[0] ; + outputVector[0] = vector1[0] | vector2[0]; outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[0] | vector2[i]; outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) @@ -222,7 +226,7 @@ public void evaluate(VectorizedRowBatch batch) { || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[0] | vector2[i]; outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) @@ -232,15 +236,15 @@ public void evaluate(VectorizedRowBatch batch) { outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0] ; + outputVector[i] = vector1[i] | vector2[0]; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[0]; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) @@ -248,17 +252,17 @@ public void evaluate(VectorizedRowBatch batch) { } } outV.isRepeating = false; - } else /* neither side is repeating */ { + } else /* neither side is repeating */{ if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[i] ; + outputVector[i] = vector1[i] | vector2[i]; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { - for(int i = 0; i != n; i++) { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) @@ -269,7 +273,7 @@ public void evaluate(VectorizedRowBatch batch) { } outV.noNulls = false; } - } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColumnExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColumnExpression.java deleted file mode 100644 index 0f819f5..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColumnExpression.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * Vector column project. It's evaluation is a no-op. - * - */ -public class ColumnExpression extends VectorExpression { - - private int colNum; - private String colType; - - public ColumnExpression(int colNum, String colType) { - this.colNum = colNum; - this.colType = colType; - } - - public void evaluate(VectorizedRowBatch vrg) { - //do nothing - } - - @Override - public int getOutputColumn() { - return colNum; - } - - @Override - public String getOutputType() { - return colType; - } -} - diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index e0e87f1..c1c0c0d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -23,6 +23,9 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * Constant is represented as a vector with repeating values. + */ public class ConstantVectorExpression extends VectorExpression { private static enum Type { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java index 0a1e3a1..45169b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java @@ -19,9 +19,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * An expression representing a column, only children are evaluated. + */ public class IdentityExpression extends VectorExpression { - int colNum; - String type; + private final int colNum; + private final String type; public IdentityExpression(int colNum, String type) { this.colNum = colNum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index fd5f5b5..96eb215 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -26,8 +26,8 @@ * The boolean output is stored in the specified output column. */ public class IsNotNull extends VectorExpression { - int colNum; - int outputColumn; + private final int colNum; + private final int outputColumn; public IsNotNull(int colNum, int outputColumn) { this.colNum = colNum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index a470aae..e6f5fe2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -26,8 +26,8 @@ * The boolean output is stored in the specified output column. */ public class IsNull extends VectorExpression { - int colNum; - int outputColumn; + private final int colNum; + private final int outputColumn; public IsNull(int colNum, int outputColumn) { this.colNum = colNum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java index cd6c7bc..5af6997 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java @@ -15,12 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; /** @@ -54,24 +53,24 @@ public void evaluate(VectorizedRowBatch batch) { long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; double[] outputVector = outputColVector.vector; - + // return immediately if batch is empty if (n == 0) { return; } - + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; - - // Handle nulls first + + // Handle nulls first NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] / (double) vector2[0]; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { @@ -107,9 +106,9 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors * in complex arithmetic expressions like col2 / (col1 - 1) * in the case when some col1 entries are null. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java index a20cdb5..2d64e69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongScalar.java @@ -15,14 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * This operation is handled as a special case because Hive @@ -30,9 +28,9 @@ * from a template like the other arithmetic operations are. */ public class LongColDivideLongScalar extends VectorExpression { - private int colNum; - private long value; - private int outputColumn; + private final int colNum; + private final long value; + private final int outputColumn; public LongColDivideLongScalar(int colNum, long value, int outputColumn) { this.colNum = colNum; @@ -57,7 +55,7 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; - + // return immediately if batch is empty if (n == 0) { return; @@ -65,9 +63,9 @@ public void evaluate(VectorizedRowBatch batch) { if (inputColVector.isRepeating) { outputVector[0] = vector[0] / (double) value; - + // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + outputIsNull[0] = inputIsNull[0]; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,9 +91,9 @@ public void evaluate(VectorizedRowBatch batch) { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } - + /* Set double data vector array entries for NULL elements to the correct value. - * Unlike other col-scalar operations, this one doesn't benefit from carrying + * Unlike other col-scalar operations, this one doesn't benefit from carrying * over NaN values from the input array. */ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); @@ -105,7 +103,7 @@ public void evaluate(VectorizedRowBatch batch) { public int getOutputColumn() { return outputColumn; } - + @Override public String getOutputType() { return "double"; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java index cbc7d48..cfa3cf4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarDivideLongColumn.java @@ -15,14 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * This operation is handled as a special case because Hive @@ -30,9 +28,9 @@ * from a template like the other arithmetic operations are. */ public class LongScalarDivideLongColumn extends VectorExpression { - private int colNum; + private final int colNum; private final double value; - private int outputColumn; + private final int outputColumn; public LongScalarDivideLongColumn(long value, int colNum, int outputColumn) { this.colNum = colNum; @@ -57,7 +55,7 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; - + // return immediately if batch is empty if (n == 0) { return; @@ -65,9 +63,9 @@ public void evaluate(VectorizedRowBatch batch) { if (inputColVector.isRepeating) { outputVector[0] = value / vector[0]; - + // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + outputIsNull[0] = inputIsNull[0]; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,9 +91,9 @@ public void evaluate(VectorizedRowBatch batch) { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } - + /* Set double data vector array entries for NULL elements to the correct value. - * Unlike other col-scalar operations, this one doesn't benefit from carrying + * Unlike other col-scalar operations, this one doesn't benefit from carrying * over NaN values from the input array. */ NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); @@ -105,7 +103,7 @@ public void evaluate(VectorizedRowBatch batch) { public int getOutputColumn() { return outputColumn; } - + @Override public String getOutputType() { return "double"; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java index 90dd7da..92beb93 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java @@ -18,21 +18,21 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -/** - * String expression evaluation helper functions +/** + * String expression evaluation helper functions. */ public class StringExpr { - - /* Compare two strings from two byte arrays each - * with their own start position and length. - * Use lexicographic unsigned byte value order. + + /* Compare two strings from two byte arrays each + * with their own start position and length. + * Use lexicographic unsigned byte value order. * This is what's used for UTF-8 sort order. - * Return negative value if arg1 < arg2, 0 if arg1 = arg2, + * Return negative value if arg1 < arg2, 0 if arg1 = arg2, * positive if arg1 > arg2. */ public static int compare(byte[] arg1, int start1, int len1, byte[] arg2, int start2, int len2) { for (int i = 0; i < len1 && i < len2; i++) { - int b1 = arg1[i + start1] & 0xff; + int b1 = arg1[i + start1] & 0xff; int b2 = arg2[i + start2] & 0xff; if (b1 != b2) { return b1 - b2; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 32b4d4b..dc79ff1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -22,11 +22,15 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * Calculate the length of the strings in the input column vector, and store + * it in the output column vector. + */ public class StringLength extends VectorExpression { - private int colNum; - private int outputColumn; - - public StringLength (int colNum, int outputColumn) { + private final int colNum; + private final int outputColumn; + + public StringLength(int colNum, int outputColumn) { this.colNum = colNum; this.outputColumn = outputColumn; } @@ -34,45 +38,44 @@ public StringLength (int colNum, int outputColumn) { // Calculate the length of the UTF-8 strings in input vector and place results in output vector. @Override public void evaluate(VectorizedRowBatch batch) { - + if (childExpressions != null) { super.evaluateChildren(batch); } - + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; - int start[] = inputColVector.start; - int length[] = inputColVector.length; - long[] resultLen = outV.vector; - + int [] start = inputColVector.start; + int [] length = inputColVector.length; + long[] resultLen = outV.vector; + if (n == 0) { - //Nothing to do return; } - + if (inputColVector.noNulls) { outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; - resultLen[0] = UTF8StringLength(vector[0], start[0], length[0]); + resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - resultLen[i] = UTF8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { - resultLen[i] = UTF8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } outV.isRepeating = false; } } else { - + /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. @@ -82,13 +85,13 @@ public void evaluate(VectorizedRowBatch batch) { outV.isRepeating = true; outV.isNull[0] = inputColVector.isNull[0]; if (!inputColVector.isNull[0]) { - resultLen[0] = UTF8StringLength(vector[0], start[0], length[0]); + resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; if (!inputColVector.isNull[i]) { - resultLen[i] = UTF8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } outV.isNull[i] = inputColVector.isNull[i]; } @@ -96,7 +99,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - resultLen[i] = UTF8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } outV.isNull[i] = inputColVector.isNull[i]; } @@ -104,22 +107,21 @@ public void evaluate(VectorizedRowBatch batch) { } } } - - /* + + /* * Return length in characters of UTF8 string in byte array * beginning at start that is len bytes long. */ - static long UTF8StringLength(byte[] s, int start, int len) - { + static long utf8StringLength(byte[] s, int start, int len) { long resultLength = 0; for (int i = start; i < start + len; i++) { - - /* Byte bit patterns of the form 10xxxxxx are continuation - * bytes. All other bit patterns are the first byte of + + /* Byte bit patterns of the form 10xxxxxx are continuation + * bytes. All other bit patterns are the first byte of * a character. */ if ((s[i] & 0xc0) != 0x80) { - resultLength++; + resultLength++; } } return resultLength; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java index 19e14f3..a93137c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java @@ -15,11 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.udf.UDFLower; import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; +import org.apache.hadoop.hive.ql.udf.UDFLower; +/** + * Expression to convert a string to lower case. + * Extends {@link StringUnaryUDF}. + */ public class StringLower extends StringUnaryUDF { public StringLower(int colNum, int outputColumn) { super(colNum, outputColumn, (IUDFUnaryString) new UDFLower()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 37610f1..dbf0196 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; @@ -6,19 +24,21 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; /** - * This class provides the implementation of vectorized substring, with a single start index parameter. - * If the start index is invalid (outside of the string boundaries) then an empty string will be in the output. + * This class provides the implementation of vectorized substring, with a single start index + * parameter. If the start index is invalid (outside of the string boundaries) then an empty + * string will be in the output. */ public class StringSubstrColStart extends VectorExpression { private final int startIdx; private final int colNum; private final int outputColumn; - private static byte[] EMPTY_STRING; + private static byte[] EMPTYSTRING; - // Populating the Empty string bytes. Putting it as static since it should be immutable and can be shared + // Populating the Empty string bytes. Putting it as static since it should be immutable and can + // be shared. static { try { - EMPTY_STRING = "".getBytes("UTF-8"); + EMPTYSTRING = "".getBytes("UTF-8"); } catch(UnsupportedEncodingException e) { e.printStackTrace(); } @@ -31,8 +51,8 @@ public StringSubstrColStart(int colNum, int startIdx, int outputColumn) { } /** - * Given the substring start index param it finds the starting offset of the passed in utf8 string byte array - * that matches the index. + * Given the substring start index param it finds the starting offset of the passed in utf8 + * string byte array that matches the index. * @param utf8String byte array that holds the utf8 string * @param start start offset of the byte array the string starts at * @param len length of the bytes the string holds in the byte array @@ -94,7 +114,7 @@ public void evaluate(VectorizedRowBatch batch) { if (!inV.noNulls && inV.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; - outV.setRef(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(0, EMPTYSTRING, 0, EMPTYSTRING.length); return; } else { outV.noNulls = true; @@ -102,7 +122,7 @@ public void evaluate(VectorizedRowBatch batch) { if (offset != -1) { outV.setRef(0, vector[0], offset, len[0] - offset); } else { - outV.setRef(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(0, EMPTYSTRING, 0, EMPTYSTRING.length); } } } else { @@ -113,12 +133,13 @@ public void evaluate(VectorizedRowBatch batch) { for (int i = 0; i != n; ++i) { int selected = sel[i]; if (!inV.isNull[selected]) { - int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx); + int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], + startIdx); outV.isNull[selected] = false; if (offset != -1) { outV.setRef(selected, vector[selected], offset, len[selected] - offset); } else { - outV.setRef(selected, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(selected, EMPTYSTRING, 0, EMPTYSTRING.length); } } else { outV.isNull[selected] = true; @@ -128,11 +149,12 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = true; for (int i = 0; i != n; ++i) { int selected = sel[i]; - int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx); + int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], + startIdx); if (offset != -1) { outV.setRef(selected, vector[selected], offset, len[selected] - offset); } else { - outV.setRef(selected, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(selected, EMPTYSTRING, 0, EMPTYSTRING.length); } } } @@ -146,7 +168,7 @@ public void evaluate(VectorizedRowBatch batch) { if (offset != -1) { outV.setRef(i, vector[i], offset, len[i] - offset); } else { - outV.setRef(i, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(i, EMPTYSTRING, 0, EMPTYSTRING.length); } } } @@ -157,7 +179,7 @@ public void evaluate(VectorizedRowBatch batch) { if (offset != -1) { outV.setRef(i, vector[i], offset, len[i] - offset); } else { - outV.setRef(i, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(i, EMPTYSTRING, 0, EMPTYSTRING.length); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 5864375..c48355e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; @@ -6,9 +24,11 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; /** - * This class provides the implementation of vectorized substring, with a start index and length parameters. - * If the start index is invalid (outside of the string boundaries) then an empty string will be in the output. - * If the length provided is longer then the string boundary, then it will replace it with the ending index. + * This class provides the implementation of vectorized substring, with a start index and length + * parameters. If the start index is invalid (outside of the string boundaries) then an empty + * string will be in the output. + * If the length provided is longer then the string boundary, then it will replace it with the + * ending index. */ public class StringSubstrColStartLen extends VectorExpression { private final int startIdx; @@ -16,12 +36,13 @@ private final int length; private final int outputColumn; private final int[] offsetArray; - private static byte[] EMPTY_STRING; + private static byte[] EMPTYSTRING; - // Populating the Empty string bytes. Putting it as static since it should be immutable and can be shared + // Populating the Empty string bytes. Putting it as static since it should be immutable and can be + // shared static { try { - EMPTY_STRING = "".getBytes("UTF-8"); + EMPTYSTRING = "".getBytes("UTF-8"); } catch(UnsupportedEncodingException e) { e.printStackTrace(); } @@ -45,7 +66,8 @@ public StringSubstrColStartLen(int colNum, int startIdx, int length, int outputC * @param substrLen the length of the substring * @param offsetArray the array that indexes are populated to. Assume its length >= 2. */ - static void populateSubstrOffsets(byte[] utf8String, int start, int len, int substrStart, int substrLength, int[] offsetArray) { + static void populateSubstrOffsets(byte[] utf8String, int start, int len, int substrStart, + int substrLength, int[] offsetArray) { int curIdx = -1; offsetArray[0] = -1; offsetArray[1] = -1; @@ -109,7 +131,7 @@ public void evaluate(VectorizedRowBatch batch) { if (!inV.noNulls && inV.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; - outV.setRef(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(0, EMPTYSTRING, 0, EMPTYSTRING.length); return; } else { outV.noNulls = true; @@ -117,7 +139,7 @@ public void evaluate(VectorizedRowBatch batch) { if (offsetArray[0] != -1) { outV.setRef(0, vector[0], offsetArray[0], offsetArray[1]); } else { - outV.setRef(0, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(0, EMPTYSTRING, 0, EMPTYSTRING.length); } } } else { @@ -129,11 +151,12 @@ public void evaluate(VectorizedRowBatch batch) { int selected = sel[i]; if (!inV.isNull[selected]) { outV.isNull[selected] = false; - populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, length, offsetArray); + populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, + length, offsetArray); if (offsetArray[0] != -1) { outV.setRef(selected, vector[selected], offsetArray[0], offsetArray[1]); } else { - outV.setRef(selected, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(selected, EMPTYSTRING, 0, EMPTYSTRING.length); } } else { outV.isNull[selected] = true; @@ -144,11 +167,12 @@ public void evaluate(VectorizedRowBatch batch) { for (int i = 0; i != n; ++i) { int selected = sel[i]; outV.isNull[selected] = false; - populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, length, offsetArray); + populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, + length, offsetArray); if (offsetArray[0] != -1) { outV.setRef(selected, vector[selected], offsetArray[0], offsetArray[1]); } else { - outV.setRef(selected, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(selected, EMPTYSTRING, 0, EMPTYSTRING.length); } } } @@ -162,7 +186,7 @@ public void evaluate(VectorizedRowBatch batch) { if (offsetArray[0] != -1) { outV.setRef(i, vector[i], offsetArray[0], offsetArray[1]); } else { - outV.setRef(i, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(i, EMPTYSTRING, 0, EMPTYSTRING.length); } } } @@ -174,7 +198,7 @@ public void evaluate(VectorizedRowBatch batch) { if (offsetArray[0] != -1) { outV.setRef(i, vector[i], offsetArray[0], offsetArray[1]); } else { - outV.setRef(i, EMPTY_STRING, 0, EMPTY_STRING.length); + outV.setRef(i, EMPTYSTRING, 0, EMPTYSTRING.length); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 5b66d86..23b909a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -15,21 +15,26 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.io.Text; import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; +import org.apache.hadoop.io.Text; +/** + * Expression for vectorized evaluation of unary UDFs on strings. + * An object of {@link IUDFUnaryString} is applied to every element of + * the vector. + */ public class StringUnaryUDF extends VectorExpression { - - int colNum; - int outputColumn; - IUDFUnaryString func; - Text s; - - StringUnaryUDF (int colNum, int outputColumn, IUDFUnaryString func) { + private final int colNum; + private final int outputColumn; + private final IUDFUnaryString func; + private final Text s; + + StringUnaryUDF(int colNum, int outputColumn, IUDFUnaryString func) { this.colNum = colNum; this.outputColumn = outputColumn; this.func = func; @@ -38,56 +43,55 @@ @Override public void evaluate(VectorizedRowBatch batch) { - + if (childExpressions != null) { super.evaluateChildren(batch); } - + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; - int start[] = inputColVector.start; - int length[] = inputColVector.length; + int [] start = inputColVector.start; + int [] length = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - byte[][] outputVector = outV.vector; Text t; - + if (n == 0) { //Nothing to do return; } - + // Design Note: In the future, if this function can be implemented // directly to translate input to output without creating new // objects, performance can probably be improved significantly. - // It's implemented in the simplest way now, just calling the + // It's implemented in the simplest way now, just calling the // existing built-in function. if (inputColVector.noNulls) { outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; - s.set(vector[0], start[0], length[0]); + s.set(vector[0], start[0], length[0]); t = func.evaluate(s); outV.setRef(0, t.getBytes(), 0, t.getLength()); } else if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - s.set(vector[i], start[i], length[i]); + s.set(vector[i], start[i], length[i]); t = func.evaluate(s); outV.setRef(i, t.getBytes(), 0, t.getLength()); } outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { - s.set(vector[i], start[i], length[i]); + s.set(vector[i], start[i], length[i]); t = func.evaluate(s); outV.setRef(i, t.getBytes(), 0, t.getLength()); } outV.isRepeating = false; } - } else { + } else { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; @@ -95,7 +99,7 @@ public void evaluate(VectorizedRowBatch batch) { outV.isRepeating = true; outV.isNull[0] = inputColVector.isNull[0]; if (!inputColVector.isNull[0]) { - s.set(vector[0], start[0], length[0]); + s.set(vector[0], start[0], length[0]); t = func.evaluate(s); outV.setRef(0, t.getBytes(), 0, t.getLength()); } @@ -103,7 +107,7 @@ public void evaluate(VectorizedRowBatch batch) { for(int j=0; j != n; j++) { int i = sel[j]; if (!inputColVector.isNull[i]) { - s.set(vector[i], start[i], length[i]); + s.set(vector[i], start[i], length[i]); t = func.evaluate(s); outV.setRef(i, t.getBytes(), 0, t.getLength()); } @@ -113,7 +117,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { - s.set(vector[i], start[i], length[i]); + s.set(vector[i], start[i], length[i]); t = func.evaluate(s); outV.setRef(i, t.getBytes(), 0, t.getLength()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java index 126d183..c5e748b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java @@ -18,9 +18,13 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; +import org.apache.hadoop.hive.ql.udf.UDFUpper; +/** + * Expression to convert a string to lower case. + * Extends {@link StringUnaryUDF}. + */ public class StringUpper extends StringUnaryUDF { public StringUpper(int colNum, int outputColumn) { super(colNum, outputColumn, (IUDFUnaryString) new UDFUpper()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java index 60ab2b1..08e6d24 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java @@ -20,11 +20,14 @@ import java.util.Calendar; - +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFDayOfMonthLong extends VectorUDFTimestampFieldLong { public VectorUDFDayOfMonthLong(int colNum, int outputColumn) { - super(Calendar.DAY_OF_MONTH, colNum,outputColumn); + super(Calendar.DAY_OF_MONTH, colNum, outputColumn); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java index 9b05476..0107510 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java @@ -20,11 +20,14 @@ import java.util.Calendar; - +/** + * Returns hour of day. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFHourLong extends VectorUDFTimestampFieldLong { public VectorUDFHourLong(int colNum, int outputColumn) { - super(Calendar.HOUR_OF_DAY, colNum,outputColumn); + super(Calendar.HOUR_OF_DAY, colNum, outputColumn); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java index 8589c3c..17ea01b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java @@ -20,11 +20,14 @@ import java.util.Calendar; - +/** + * Returns minute value. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFMinuteLong extends VectorUDFTimestampFieldLong { public VectorUDFMinuteLong(int colNum, int outputColumn) { - super(Calendar.MINUTE, colNum,outputColumn); + super(Calendar.MINUTE, colNum, outputColumn); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java index 5057060..e9a6115 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java @@ -20,7 +20,10 @@ import java.util.Calendar; - +/** + * Returns month value. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFMonthLong extends VectorUDFTimestampFieldLong { public VectorUDFMonthLong(int colNum, int outputColumn) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java index b11f1f0..4a85caf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java @@ -20,11 +20,14 @@ import java.util.Calendar; - +/** + * Expression to get seconds. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFSecondLong extends VectorUDFTimestampFieldLong { public VectorUDFSecondLong(int colNum, int outputColumn) { - super(Calendar.SECOND, colNum,outputColumn); + super(Calendar.SECOND, colNum, outputColumn); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java index 36ad0ad..26df4db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java @@ -24,8 +24,9 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - - +/** + * Abstract class to return various fields from a Timestamp. + */ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { protected final int colNum; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java index 95e2c6d..9281ae7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; - - +/** + * Return Unix Timestamp. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFUnixTimeStampLong extends VectorUDFTimestampFieldLong { @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java index a60a086..614acea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java @@ -20,7 +20,10 @@ import java.util.Calendar; - +/** + * Expression to get week of year. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFWeekOfYearLong extends VectorUDFTimestampFieldLong { public VectorUDFWeekOfYearLong(int colNum, int outputColumn) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java index 852008d..885d193 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java @@ -21,7 +21,10 @@ import java.util.Arrays; import java.util.Calendar; - +/** + * Expression to get year as a long. + * Extends {@link VectorUDFTimestampFieldLong} + */ public final class VectorUDFYearLong extends VectorUDFTimestampFieldLong { /* year boundaries in nanoseconds */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java index bee2a63..a4c1999 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.exec.vector.expressions.templates; import java.io.BufferedReader; @@ -7,9 +25,12 @@ import java.io.FileWriter; import java.io.IOException; +/** + * This class generates java classes from the templates. + */ public class CodeGen { - static String [][] templateExpansions = + private static String [][] templateExpansions = { {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, @@ -195,14 +216,20 @@ {"ColumnUnaryMinus", "long"}, {"ColumnUnaryMinus", "double"}, - //template, , , , , - {"VectorUDAFMinMax", "VectorUDAFMinLong", "long", "<", "min", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: long)"}, - {"VectorUDAFMinMax", "VectorUDAFMinDouble", "double", "<", "min", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: double)"}, - {"VectorUDAFMinMax", "VectorUDAFMaxLong", "long", ">", "max", "_FUNC_(expr) - Returns the maximum value of expr (vectorized, type: long)"}, - {"VectorUDAFMinMax", "VectorUDAFMaxDouble", "double", ">", "max", "_FUNC_(expr) - Returns the maximum value of expr (vectorized, type: double)"}, - - {"VectorUDAFMinMaxString", "VectorUDAFMinString", "<", "min", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: string)"}, - {"VectorUDAFMinMaxString", "VectorUDAFMaxString", ">", "max", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: string)"}, + // template, , , , , + {"VectorUDAFMinMax", "VectorUDAFMinLong", "long", "<", "min", + "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: long)"}, + {"VectorUDAFMinMax", "VectorUDAFMinDouble", "double", "<", "min", + "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: double)"}, + {"VectorUDAFMinMax", "VectorUDAFMaxLong", "long", ">", "max", + "_FUNC_(expr) - Returns the maximum value of expr (vectorized, type: long)"}, + {"VectorUDAFMinMax", "VectorUDAFMaxDouble", "double", ">", "max", + "_FUNC_(expr) - Returns the maximum value of expr (vectorized, type: double)"}, + + {"VectorUDAFMinMaxString", "VectorUDAFMinString", "<", "min", + "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: string)"}, + {"VectorUDAFMinMaxString", "VectorUDAFMaxString", ">", "max", + "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: string)"}, //template, , {"VectorUDAFSum", "VectorUDAFSumLong", "long"}, @@ -210,15 +237,32 @@ {"VectorUDAFAvg", "VectorUDAFAvgLong", "long"}, {"VectorUDAFAvg", "VectorUDAFAvgDouble", "double"}, - //template, , , , , - {"VectorUDAFVar", "VectorUDAFVarPopLong", "long", "myagg.variance / myagg.count", "variance, var_pop", "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFVarPopDouble", "double", "myagg.variance / myagg.count", "variance, var_pop", "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, double)"}, - {"VectorUDAFVar", "VectorUDAFVarSampLong", "long", "myagg.variance / (myagg.count-1.0)", "var_samp", "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFVarSampDouble", "double", "myagg.variance / (myagg.count-1.0)", "var_samp", "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, double)"}, - {"VectorUDAFVar", "VectorUDAFStdPopLong", "long", "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFStdPopDouble", "double", "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, double)"}, - {"VectorUDAFVar", "VectorUDAFStdSampLong", "long", "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFStdSampDouble", "double", "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, double)"}, + // template, , , , , + // + {"VectorUDAFVar", "VectorUDAFVarPopLong", "long", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFVarPopDouble", "double", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, double)"}, + {"VectorUDAFVar", "VectorUDAFVarSampLong", "long", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFVarSampDouble", "double", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, double)"}, + {"VectorUDAFVar", "VectorUDAFStdPopLong", "long", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFStdPopDouble", "double", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, double)"}, + {"VectorUDAFVar", "VectorUDAFStdSampLong", "long", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFStdSampDouble", "double", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, double)"}, }; @@ -239,13 +283,13 @@ public CodeGen() { templateDirectory = System.getProperty("user.dir"); File f = new File(templateDirectory); outputDirectory = joinPath(f.getParent(), "gen"); - testCodeGen = new TestCodeGen(joinPath(f.getParent(), "test"),templateDirectory); + testCodeGen = new TestCodeGen(joinPath(f.getParent(), "test"), templateDirectory); } public CodeGen(String templateDirectory, String outputDirectory, String testOutputDirectory) { this.templateDirectory = templateDirectory; this.outputDirectory = outputDirectory; - testCodeGen = new TestCodeGen(testOutputDirectory,templateDirectory); + testCodeGen = new TestCodeGen(testOutputDirectory, templateDirectory); } /** @@ -418,16 +462,17 @@ private void generateFilterStringScalarCompareColumn(String[] tdesc) throws IOEx private void generateFilterStringColumnCompareScalar(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterStringCol" + operatorName + "StringScalar"; - generateFilterStringColumnCompareScalar(tdesc,className); + generateFilterStringColumnCompareScalar(tdesc, className); } private void generateFilterStringColumnCompareColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterStringCol" + operatorName + "StringColumn"; - generateFilterStringColumnCompareScalar(tdesc,className); + generateFilterStringColumnCompareScalar(tdesc, className); } - private void generateFilterStringColumnCompareScalar(String[] tdesc, String className) throws IOException { + private void generateFilterStringColumnCompareScalar(String[] tdesc, String className) + throws IOException { String operatorSymbol = tdesc[2]; String outputFile = joinPath(this.outputDirectory, className + ".java"); // Read the template into a string; @@ -569,16 +614,14 @@ private void generateColumnBinaryOperatorScalar(String[] tdesc, String returnTyp templateString = templateString.replaceAll("", returnType); writeFile(outputFile, templateString); - if(returnType==null) - { + if(returnType==null) { testCodeGen.addColumnScalarFilterTestCases( true, className, inputColumnVectorType, operandType2, operatorSymbol); - }else - { + } else { testCodeGen.addColumnScalarOperationTestCases( true, className, @@ -586,7 +629,6 @@ private void generateColumnBinaryOperatorScalar(String[] tdesc, String returnTyp outputColumnVectorType, operandType2); } - } private void generateScalarBinaryOperatorColumn(String[] tdesc, String returnType, @@ -610,16 +652,14 @@ private void generateScalarBinaryOperatorColumn(String[] tdesc, String returnTyp templateString = templateString.replaceAll("", returnType); writeFile(outputFile, templateString); - if(returnType==null) - { + if(returnType==null) { testCodeGen.addColumnScalarFilterTestCases( false, className, inputColumnVectorType, operandType1, operatorSymbol); - }else - { + } else { testCodeGen.addColumnScalarOperationTestCases( false, className, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/IUDFUnaryString.java ql/src/java/org/apache/hadoop/hive/ql/udf/IUDFUnaryString.java index 017a89e..360843d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/IUDFUnaryString.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/IUDFUnaryString.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.io.Text; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index 57ecb68..8881e0f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -24,23 +24,24 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongScalar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.junit.Test; +/** + * Test cases for vectorized filter operator. + */ public class TestVectorFilterOperator { - - /* + + /** * Fundamental logic and performance tests for vector filters belong here. - * + * * For tests about filters to cover specific operator and data type combinations, * see also the other filter tests under org.apache.hadoop.hive.ql.exec.vector.expressions */ - public static class FakeDataReader { - int size; - VectorizedRowBatch vrg; - int currentSize = 0; + private final int size; + private final VectorizedRowBatch vrg; + private int currentSize = 0; private final int numCols; private final int len = 1024; @@ -51,9 +52,7 @@ public FakeDataReader(int size, int numCols) { for (int i = 0; i < numCols; i++) { try { Thread.sleep(2); - } catch (InterruptedException e) { - - } + } catch (InterruptedException ignore) {} vrg.cols[i] = getLongVector(len); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 65657ea..d1d2ea9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -1665,7 +1665,8 @@ public void testAggregateLongKeyAggregate ( HashMap expected) throws HiveException { @SuppressWarnings("unchecked") - FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, list, values); + FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, + list, values); testAggregateLongKeyIterable (aggregateName, fdr, expected); } @@ -1701,7 +1702,8 @@ public void testAggregateLongAggregate ( Object expected) throws HiveException { @SuppressWarnings("unchecked") - FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, values); + FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, + values); testAggregateLongIterable (aggregateName, fdr, expected); } @@ -1711,7 +1713,8 @@ public void testAggregateCountStar ( Object expected) throws HiveException { @SuppressWarnings("unchecked") - FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, values); + FakeVectorRowBatchFromLongIterables fdr = new FakeVectorRowBatchFromLongIterables(batchSize, + values); testAggregateCountStarIterable (fdr, expected); } @@ -1904,7 +1907,8 @@ public void testAggregateStringIterable ( mapColumnNames.put("A", 0); VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); - GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", TypeInfoFactory.stringTypeInfo); + GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", + TypeInfoFactory.stringTypeInfo); VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); @@ -1934,7 +1938,8 @@ public void testAggregateDoubleIterable ( mapColumnNames.put("A", 0); VectorizationContext ctx = new VectorizationContext(mapColumnNames, 1); - GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", TypeInfoFactory.doubleTypeInfo); + GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", + TypeInfoFactory.doubleTypeInfo); VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index dd2f839..cd9e836 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -39,6 +39,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; +/** + * Unit tests for vectorized select operator. + */ public class TestVectorSelectOperator { static class ValidatorVectorSelectOperator extends VectorSelectOperator { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 7de0113..189799e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.ql.exec.vector; import static org.junit.Assert.assertEquals; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java index 9a56c52..efbc758 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java @@ -59,7 +59,7 @@ import org.junit.Test; /** - * Class that tests the functionality of VectorizedRowBatchCtx + * Class that tests the functionality of VectorizedRowBatchCtx. */ public class TestVectorizedRowBatchCtx { @@ -81,7 +81,7 @@ public void openFileSystem() throws Exception { fs.delete(testFilePath, false); } - private void InitSerde() { + private void initSerde() { tbl = new Properties(); // Set the configuration parameters @@ -325,8 +325,7 @@ void ValidateRowBatch(VectorizedRowBatch batch) throws IOException, SerDeExcepti @Test public void TestCtx() throws Exception { - - InitSerde(); + initSerde(); WriteRCFile(this.fs, this.testFilePath, this.conf); VectorizedRowBatch batch = GetRowBatch(); ValidateRowBatch(batch); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java index c656ce4..4321545 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java @@ -30,6 +30,9 @@ import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.junit.Test; +/** + * Test vector expressions with constants. + */ public class TestConstantVectorExpression { @Test diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java index 0cf3bf8..289a508 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java @@ -26,12 +26,15 @@ import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.junit.Test; +/** + * Unit tests for unary minus. + */ public class TestUnaryMinus { @Test public void testUnaryMinus() { VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1024, 2, 23); - LongColUnaryMinus expr = new LongColUnaryMinus(0,1); + LongColUnaryMinus expr = new LongColUnaryMinus(0, 1); expr.evaluate(vrg); //verify long[] inVector = ((LongColumnVector) vrg.cols[0]).vector; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index a5aadac..9de8461 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -23,22 +23,23 @@ import static org.junit.Assert.assertTrue; import junit.framework.Assert; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.junit.Test; +/** + * Unit tests for vectori arithmetic expressions. + */ public class TestVectorArithmeticExpressions { @Test public void testLongColAddLongScalarNoNulls() { - VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector - (VectorizedRowBatch.DEFAULT_SIZE); + VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); expr.evaluate(vrg); //verify @@ -60,7 +61,7 @@ private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) { vrg.size = size; return vrg; } - + public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() { VectorizedRowBatch batch = new VectorizedRowBatch(3); LongColumnVector lcv, lcv2; @@ -73,7 +74,7 @@ public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() { batch.cols[1] = lcv2; for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { lcv2.vector[i] = i * 37; - } + } batch.cols[2] = new DoubleColumnVector(); batch.size = VectorizedRowBatch.DEFAULT_SIZE; return batch; @@ -81,14 +82,14 @@ public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() { @Test public void testLongColAddLongScalarWithNulls() { - VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector - (VectorizedRowBatch.DEFAULT_SIZE); + VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector( + VectorizedRowBatch.DEFAULT_SIZE); LongColumnVector lcv = (LongColumnVector) batch.cols[0]; LongColumnVector lcvOut = (LongColumnVector) batch.cols[1]; TestVectorizedRowBatch.addRandomNulls(lcv); LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); expr.evaluate(batch); - + // verify for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { if (!lcv.isNull[i]) { @@ -109,8 +110,7 @@ public void testLongColAddLongScalarWithRepeating() { LongColAddLongScalar expr; // Case 1: is repeating, no nulls - batch = getVectorizedRowBatchSingleLongVector - (VectorizedRowBatch.DEFAULT_SIZE); + batch = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); in = (LongColumnVector) batch.cols[0]; in.isRepeating = true; out = (LongColumnVector) batch.cols[1]; @@ -123,8 +123,7 @@ public void testLongColAddLongScalarWithRepeating() { Assert.assertEquals(out.vector[0], 0 * 37 + 23); // Case 2: is repeating, has nulls - batch = getVectorizedRowBatchSingleLongVector - (VectorizedRowBatch.DEFAULT_SIZE); + batch = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); in = (LongColumnVector) batch.cols[0]; in.isRepeating = true; in.noNulls = false; @@ -142,9 +141,9 @@ public void testLongColAddLongScalarWithRepeating() { Assert.assertEquals(true, out.isNull[0]); verifyLongNullDataVectorEntries(out, batch.selected, batch.selectedInUse, batch.size); } - - /* Make sure all the NULL entries in this long column output vector have their data vector - * element set to the correct value, as per the specification, to prevent later arithmetic + + /* Make sure all the NULL entries in this long column output vector have their data vector + * element set to the correct value, as per the specification, to prevent later arithmetic * errors (e.g. zero-divide). */ public static void verifyLongNullDataVectorEntries( @@ -155,8 +154,7 @@ public static void verifyLongNullDataVectorEntries( if (v.isNull[0]) { assertEquals(LongColumnVector.NULL_VALUE, v.vector[0]); } - } - else if (selectedInUse) { + } else if (selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; if (v.isNull[i]) { @@ -167,7 +165,7 @@ else if (selectedInUse) { for (int i = 0; i != n; i++) { if (v.isNull[i]) { assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]); - } + } } } } @@ -217,7 +215,7 @@ public void testLongColAddLongColumn() { LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5); expr2.evaluate(vrg); for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { - assertEquals(seed * ( 4 + 5*(i+1)), lcv5.vector[i]); + assertEquals(seed * (4 + 5*(i+1)), lcv5.vector[i]); } // Repeating with other as nullable @@ -237,48 +235,48 @@ public void testLongColAddLongColumn() { assertTrue(lcv5.isRepeating); assertTrue(lcv5.isNull[0]); verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size); - + // Neither input has nulls. Verify that this propagates to output. vrg.selectedInUse = false; lcv0.noNulls = true; lcv1.noNulls = true; lcv0.isRepeating = false; - lcv1.isRepeating = false; + lcv1.isRepeating = false; lcv2.noNulls = false; // set output noNulls to true to make sure it gets over-written lcv2.isRepeating = true; // similarly with isRepeating expr.evaluate(vrg); assertTrue(lcv2.noNulls); assertFalse(lcv2.isRepeating); } - + @Test public void testLongColDivideLongColumn() { - + /* Testing for equality of doubles after a math operation is * not always reliable so use this as a tolerance. */ - final double eps = 1e-7d; + final double eps = 1e-7d; VectorizedRowBatch batch = getVectorizedRowBatch2LongInDoubleOut(); LongColDivideLongColumn expr = new LongColDivideLongColumn(0, 1, 2); batch.cols[0].isNull[1] = true; batch.cols[0].noNulls = false; batch.cols[1].noNulls = false; DoubleColumnVector out = (DoubleColumnVector) batch.cols[2]; - + // Set so we can verify they are reset by operation out.noNulls = true; out.isRepeating = true; - + expr.evaluate(batch); - + // 0/0 for entry 0 should work but generate NaN assertTrue(Double.isNaN(out.vector[0])); - + // verify NULL output in entry 1 is correct assertTrue(out.isNull[1]); assertTrue(Double.isNaN(out.vector[1])); - // check entries beyond first 2 + // check entries beyond first 2 for (int i = 2; i != batch.size; i++) { assertTrue(out.vector[i] > 1.0d - eps && out.vector[i] < 1.0d + eps); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java index 12a9e2e..1899c3b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java @@ -46,6 +46,9 @@ import org.apache.hadoop.io.Writable; import org.junit.Test; +/** + * Unit tests for vector expression writers. + */ public class TestVectorExpressionWriters { private final int vectorSize = 5; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java index f15bdc0..8321772 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java @@ -35,6 +35,9 @@ import org.junit.Assert; import org.junit.Test; +/** + * Unit tests for filter expressions. + */ public class TestVectorFilterExpressions { @Test @@ -76,8 +79,7 @@ public void testFilterLongColEqualLongColumn() { } @Test - public void testColOpScalarNumericFilterNullAndRepeatingLogic() - { + public void testColOpScalarNumericFilterNullAndRepeatingLogic() { // No nulls, not repeating FilterLongColGreaterLongScalar f = new FilterLongColGreaterLongScalar(0, 1); VectorizedRowBatch batch = this.getSimpleLongBatch(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index 2426588..d7b5410 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -116,7 +116,7 @@ private VectorizedRowBatch getBatchThreeBooleanCols() { i = 5; v0.vector[i] = 0; v0.isNull[i] = true; v1.vector[i] = 1; v1.isNull[i] = false; // NULL 1 i = 6; v0.vector[i] = 0; v0.isNull[i] = false; v1.vector[i] = 0; v1.isNull[i] = true; // 0 NULL i = 7; v0.vector[i] = 1; v0.isNull[i] = false; v1.vector[i] = 1; v1.isNull[i] = true; // 1 NULL - i = 8; v0.vector[i] = 1; v0.isNull[i] = true; v1.vector[i] = 1; v1.isNull[i] = true; // NULL NULL + i = 8; v0.vector[i] = 1; v0.isNull[i] = true; v1.vector[i] = 1; v1.isNull[i] = true; // NULL NULL v0.noNulls = false; v1.noNulls = false; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java index 676199d..fa4da40 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java @@ -22,14 +22,12 @@ import static org.junit.Assert.assertTrue; import junit.framework.Assert; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarModuloLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalarDivideLongColumn; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn; import org.junit.Test; /** @@ -37,12 +35,12 @@ * the left and a column vector on the right. */ public class TestVectorScalarColArithmetic { - + /* Testing for equality of doubles after a math operation is * not always reliable so use this as a tolerance. */ - private final static double eps = 1e-7d; - + private final double EPS = 1e-7d; + private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); LongColumnVector lcv = new LongColumnVector(size); @@ -67,13 +65,13 @@ private VectorizedRowBatch getBatchSingleLongVectorPositiveNonZero() { batch.size = size; return batch; } - + @Test public void testLongScalarModuloLongColNoNulls() { VectorizedRowBatch batch = getBatchSingleLongVectorPositiveNonZero(); LongScalarModuloLongColumn expr = new LongScalarModuloLongColumn(100, 0, 1); expr.evaluate(batch); - + // verify for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { Assert.assertEquals(100 % ((i + 1) * 37), ((LongColumnVector) batch.cols[1]).vector[i]); @@ -81,14 +79,14 @@ public void testLongScalarModuloLongColNoNulls() { Assert.assertTrue(((LongColumnVector)batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating); } - + @Test public void testLongScalarSubtractLongColNoNulls() { VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector( VectorizedRowBatch.DEFAULT_SIZE); LongScalarSubtractLongColumn expr = new LongScalarSubtractLongColumn(100, 0, 1); expr.evaluate(batch); - + //verify for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { Assert.assertEquals(100 - i * 37, ((LongColumnVector) batch.cols[1]).vector[i]); @@ -105,7 +103,7 @@ public void testLongScalarSubtractLongColWithNulls() { TestVectorizedRowBatch.addRandomNulls(lcv); LongScalarSubtractLongColumn expr = new LongScalarSubtractLongColumn(100, 0, 1); expr.evaluate(batch); - + //verify for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { if (!lcv.isNull[i]) { @@ -117,7 +115,7 @@ public void testLongScalarSubtractLongColWithNulls() { Assert.assertFalse(((LongColumnVector)batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating); TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries( - (LongColumnVector) batch.cols[1], batch.selected, batch.selectedInUse, batch.size); + (LongColumnVector) batch.cols[1], batch.selected, batch.selectedInUse, batch.size); } @Test @@ -135,7 +133,7 @@ public void testLongScalarSubtractLongColWithRepeating() { out.isRepeating = false; expr = new LongScalarSubtractLongColumn(100, 0, 1); expr.evaluate(batch); - + // verify Assert.assertTrue(out.isRepeating); Assert.assertTrue(out.noNulls); @@ -155,46 +153,46 @@ public void testLongScalarSubtractLongColWithRepeating() { out.noNulls = true; expr = new LongScalarSubtractLongColumn(100, 0, 1); expr.evaluate(batch); - + // verify Assert.assertTrue(out.isRepeating); Assert.assertFalse(out.noNulls); Assert.assertEquals(true, out.isNull[0]); TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries( - out, batch.selected, batch.selectedInUse, batch.size); + out, batch.selected, batch.selectedInUse, batch.size); } - - private static boolean equalsWithinTolerance(double a, double b) { - return Math.abs(a - b) < eps; + + private boolean equalsWithinTolerance(double a, double b) { + return Math.abs(a - b) < EPS; } - + @Test public void testLongScalarDivide() { - VectorizedRowBatch batch = + VectorizedRowBatch batch = TestVectorArithmeticExpressions.getVectorizedRowBatch2LongInDoubleOut(); LongColDivideLongScalar expr = new LongColDivideLongScalar(0, 100, 2); batch.cols[0].isNull[0] = true; batch.cols[0].noNulls = false; DoubleColumnVector out = (DoubleColumnVector) batch.cols[2]; out.noNulls = true; // set now so we can verify it changed - out.isRepeating = true; + out.isRepeating = true; expr.evaluate(batch); - + // verify NULL output in entry 0 is correct assertTrue(out.isNull[0]); assertTrue(Double.isNaN(out.vector[0])); - // check entries beyond first one + // check entries beyond first one for (int i = 1; i != batch.size; i++) { assertTrue(equalsWithinTolerance((i * 37) / 100d, out.vector[i])); } assertFalse(out.noNulls); assertFalse(out.isRepeating); } - - @Test + + @Test public void testScalarLongDivide() { - VectorizedRowBatch batch = + VectorizedRowBatch batch = TestVectorArithmeticExpressions.getVectorizedRowBatch2LongInDoubleOut(); LongScalarDivideLongColumn expr = new LongScalarDivideLongColumn(100, 0, 2); batch.cols[0].isNull[1] = true; @@ -203,15 +201,15 @@ public void testScalarLongDivide() { out.noNulls = true; // set now so we can verify it changed out.isRepeating = true; expr.evaluate(batch); - + // verify zero-divide result for position 0 assertTrue(Double.isInfinite(out.vector[0])); - + // verify NULL output in entry 1 is correct assertTrue(out.isNull[1]); assertTrue(Double.isNaN(out.vector[1])); - // check entries beyond 2nd one + // check entries beyond 2nd one for (int i = 2; i != batch.size; i++) { assertTrue(equalsWithinTolerance(100d / (i * 37), out.vector[i])); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java index e8c7404..a088069 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java @@ -21,6 +21,7 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; +import java.util.List; import java.util.Random; import junit.framework.Assert; @@ -42,6 +43,9 @@ import org.apache.hadoop.io.LongWritable; import org.junit.Test; +/** + * Unit tests for timestamp expressions. + */ public class TestVectorTimestampExpressions { /* copied over from VectorUDFTimestampFieldLong */ @@ -63,7 +67,7 @@ private TimestampWritable toTimestampWritable(long nanos) { } private long[] getAllBoundaries() { - ArrayList boundaries = new ArrayList(1); + List boundaries = new ArrayList(1); Calendar c = Calendar.getInstance(); c.setTimeInMillis(0); // c.set doesn't reset millis for (int year = 1902; year <= 2038; year++) { @@ -126,7 +130,7 @@ private void verifyUDFYearLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -190,7 +194,7 @@ private void verifyUDFDayOfMonthLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -252,7 +256,7 @@ private void verifyUDFHourLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -314,7 +318,7 @@ private void verifyUDFMinuteLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -376,7 +380,7 @@ private void verifyUDFMonthLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -438,7 +442,7 @@ private void verifyUDFSecondLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -489,7 +493,8 @@ private void compareToUDFUnixTimeStampLong(long t, long y) { TimestampWritable tsw = toTimestampWritable(t); LongWritable res = udf.evaluate(tsw); if(res.get() != y) { - System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t, tsw.getTimestamp().getTime()/1000); + System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t, + tsw.getTimestamp().getTime()/1000); } Assert.assertEquals(res.get(), y); @@ -504,7 +509,7 @@ private void verifyUDFUnixTimeStampLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } @@ -566,7 +571,7 @@ private void verifyUDFWeekOfYearLong(VectorizedRowBatch batch) { Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { - if (batch.cols[in].noNulls || batch.cols[in].isNull[i] == false) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java index 579f931..c8eaea1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java @@ -41,7 +41,7 @@ private final VectorizedRowBatch batch; private boolean eof; private final int batchSize; - + public String[] getTypes() { return this.types; } @@ -49,9 +49,8 @@ /** * Helper interface for assigning values to primitive vector column types. */ - private static interface ColumnVectorAssign - { - public void assign( + private static interface ColumnVectorAssign { + void assign( ColumnVector columnVector, int row, Object value); @@ -109,7 +108,7 @@ public void assign( lcv.vector[row] = TimestampUtils.getTimeNanoSec(t); } }; - + } else if (types[i].equalsIgnoreCase("string")) { batch.cols[i] = new BytesColumnVector(batchSize); columnAssign[i] = new ColumnVectorAssign() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java index 9aad077..238d40f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java @@ -24,7 +24,6 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - public class VectorizedRowGroupGenUtil { private static final long LONG_VECTOR_NULL_VALUE = 1; @@ -73,9 +72,8 @@ public static LongColumnVector generateLongColumnVector( return lcv; } - public static DoubleColumnVector generateDoubleColumnVector( - boolean nulls, boolean repeating, int size, Random rand) - { + public static DoubleColumnVector generateDoubleColumnVector(boolean nulls, + boolean repeating, int size, Random rand) { DoubleColumnVector dcv = new DoubleColumnVector(size); dcv.noNulls = !nulls;