diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt index 7e090b3..0c67902 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt @@ -20,8 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; -import org.apache.hadoop.hive.common.type.; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -33,8 +31,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { - public (int colNum, value) { - super(colNum, value.); + public (int colNum, byte[] value) { + super(colNum, value); } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index 416f9e8..831e27f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -18,10 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import java.nio.charset.StandardCharsets; - -import org.apache.hadoop.hive.common.type.; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -42,11 +38,11 @@ public class extends VectorExpression { private byte[] left; private byte[] right; - public (int colNum, left, right) { + public (int colNum, byte[] left, byte[] right) { super(); this.colNum = colNum; - this.left = left.; - this.right = right.; + this.left = left; + this.right = right; } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt index fc6a1df..31c443c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt @@ -20,8 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; -import org.apache.hadoop.hive.common.type.; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -34,8 +32,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { - public ( value, int colNum) { - super(value., colNum); + public (byte[] value, int colNum) { + super(value, colNum); } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt index 3f75dc6..eccbee2 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt @@ -20,10 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; -import java.nio.charset.StandardCharsets; - -import org.apache.hadoop.hive.common.type.; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -36,8 +32,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { - public (int colNum, value, int outputColumnNum) { - super(colNum, value., outputColumnNum); + public (int colNum, byte[] value, int outputColumnNum) { + super(colNum, value, outputColumnNum); } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt index 9768441..3d2d280 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt @@ -20,10 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; -import java.nio.charset.StandardCharsets; - -import org.apache.hadoop.hive.common.type.; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -39,8 +35,8 @@ public class extends { private static final long serialVersionUID = 1L; - public ( value, int colNum, int outputColumnNum) { - super(colNum, value., outputColumnNum); + public (byte[] value, int colNum, int outputColumnNum) { + super(colNum, value, outputColumnNum); } public () { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 45ceffc..9ddb136 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.lang.reflect.Constructor; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; @@ -3459,20 +3460,20 @@ public static boolean isDecimalFamily(String colType) { private Object getScalarValue(ExprNodeConstantDesc constDesc) throws HiveException { - if (constDesc.getTypeString().equalsIgnoreCase("String")) { - try { - byte[] bytes = ((String) constDesc.getValue()).getBytes("UTF-8"); - return bytes; - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if (constDesc.getTypeString().equalsIgnoreCase("boolean")) { + String typeString = constDesc.getTypeString(); + if (typeString.equalsIgnoreCase("String")) { + return ((String) constDesc.getValue()).getBytes(StandardCharsets.UTF_8); + } else if (charTypePattern.matcher(typeString).matches()) { + return ((HiveChar) constDesc.getValue()).getStrippedValue().getBytes(StandardCharsets.UTF_8); + } else if (varcharTypePattern.matcher(typeString).matches()) { + return ((HiveVarchar) constDesc.getValue()).getValue().getBytes(StandardCharsets.UTF_8); + } else if (typeString.equalsIgnoreCase("boolean")) { if (constDesc.getValue().equals(Boolean.valueOf(true))) { return 1; } else { return 0; } - } else if (decimalTypePattern.matcher(constDesc.getTypeString()).matches()) { + } else if (decimalTypePattern.matcher(typeString).matches()) { return constDesc.getValue(); } else { return constDesc.getValue(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java index b6a7a26..eb925af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java @@ -54,7 +54,7 @@ protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.FLOAT) + VectorExpressionDescriptor.ArgumentType.DOUBLE) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java index 4fb7934..712b8de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -28,8 +27,8 @@ public class CharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol { private static final long serialVersionUID = 1L; - public CharScalarConcatStringGroupCol(HiveChar value, int colNum, int outputColumnNum) { - super(value.getStrippedValue().getBytes(), colNum, outputColumnNum); + public CharScalarConcatStringGroupCol(byte[] value, int colNum, int outputColumnNum) { + super(value, colNum, outputColumnNum); } public CharScalarConcatStringGroupCol() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java index 1caf66a..25ac964 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveChar; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -32,8 +30,9 @@ private static final long serialVersionUID = 1L; - public IfExprCharScalarStringGroupColumn(int arg1Column, HiveChar arg2Scalar, int arg3Column, int outputColumnNum) { - super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Column, outputColumnNum); + public IfExprCharScalarStringGroupColumn(int arg1Column, byte[] arg2Scalar, int arg3Column, + int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Column, outputColumnNum); } public IfExprCharScalarStringGroupColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java index 80bff2e..0b0c532 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveChar; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -33,8 +31,8 @@ private static final long serialVersionUID = 1L; public IfExprCharScalarStringScalar( - int arg1Column, HiveChar arg2Scalar, byte[] arg3Scalar, int outputColumnNum) { - super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Scalar, outputColumnNum); + int arg1Column, byte[] arg2Scalar, byte[] arg3Scalar, int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Scalar, outputColumnNum); } public IfExprCharScalarStringScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java index fbdb097..8b4257b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -31,8 +30,8 @@ private static final long serialVersionUID = 1L; - public IfExprStringGroupColumnCharScalar(int arg1Column, int arg2Column, HiveChar arg3Scalar, int outputColumnNum) { - super(arg1Column, arg2Column, arg3Scalar.getValue().getBytes(), outputColumnNum); + public IfExprStringGroupColumnCharScalar(int arg1Column, int arg2Column, byte[] arg3Scalar, int outputColumnNum) { + super(arg1Column, arg2Column, arg3Scalar, outputColumnNum); } public IfExprStringGroupColumnCharScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java index 04a42de..641b22f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -31,8 +30,9 @@ private static final long serialVersionUID = 1L; - public IfExprStringGroupColumnVarCharScalar(int arg1Column, int arg2Column, HiveVarchar arg3Scalar, int outputColumnNum) { - super(arg1Column, arg2Column, arg3Scalar.getValue().getBytes(), outputColumnNum); + public IfExprStringGroupColumnVarCharScalar(int arg1Column, int arg2Column, byte[] arg3Scalar, + int outputColumnNum) { + super(arg1Column, arg2Column, arg3Scalar, outputColumnNum); } public IfExprStringGroupColumnVarCharScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java index 9e140a6..2a6946d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveChar; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -33,8 +31,8 @@ private static final long serialVersionUID = 1L; public IfExprStringScalarCharScalar( - int arg1Column, byte[] arg2Scalar, HiveChar arg3Scalar, int outputColumnNum) { - super(arg1Column, arg2Scalar, arg3Scalar.getValue().getBytes(), outputColumnNum); + int arg1Column, byte[] arg2Scalar, byte[] arg3Scalar, int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Scalar, outputColumnNum); } public IfExprStringScalarCharScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java index ae8544e..d70e161 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveVarchar; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -33,8 +31,8 @@ private static final long serialVersionUID = 1L; public IfExprStringScalarVarCharScalar( - int arg1Column, byte[] arg2Scalar, HiveVarchar arg3Scalar, int outputColumnNum) { - super(arg1Column, arg2Scalar, arg3Scalar.getValue().getBytes(), outputColumnNum); + int arg1Column, byte[] arg2Scalar, byte[] arg3Scalar, int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Scalar, outputColumnNum); } public IfExprStringScalarVarCharScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java index f31ff7a..0cdd694 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveVarchar; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -32,8 +30,9 @@ private static final long serialVersionUID = 1L; - public IfExprVarCharScalarStringGroupColumn(int arg1Column, HiveVarchar arg2Scalar, int arg3Column, int outputColumnNum) { - super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Column, outputColumnNum); + public IfExprVarCharScalarStringGroupColumn(int arg1Column, byte[] arg2Scalar, int arg3Column, + int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Column, outputColumnNum); } public IfExprVarCharScalarStringGroupColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java index fd93dea..6ab1ad7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveVarchar; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -33,8 +31,8 @@ private static final long serialVersionUID = 1L; public IfExprVarCharScalarStringScalar( - int arg1Column, HiveVarchar arg2Scalar, byte[] arg3Scalar, int outputColumnNum) { - super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Scalar, outputColumnNum); + int arg1Column, byte[] arg2Scalar, byte[] arg3Scalar, int outputColumnNum) { + super(arg1Column, arg2Scalar, arg3Scalar, outputColumnNum); } public IfExprVarCharScalarStringScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java index 231bf9d..bbebe6c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** @@ -28,8 +27,8 @@ public class StringGroupColConcatCharScalar extends StringGroupColConcatStringScalar { private static final long serialVersionUID = 1L; - public StringGroupColConcatCharScalar(int colNum, HiveChar value, int outputColumnNum) { - super(colNum, value.getStrippedValue().getBytes(), outputColumnNum); + public StringGroupColConcatCharScalar(int colNum, byte[] value, int outputColumnNum) { + super(colNum, value, outputColumnNum); } public StringGroupColConcatCharScalar() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index fa23d89..0c83494 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -104,6 +104,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = evaluateDate(inputColVector1, 0, vector2[0]); + } else if (inputColVector1.isRepeating) { + evaluateRepeatedDate(inputColVector1, vector2, outV, + batch.selectedInUse, batch.selected, n); + } else if (inputColVector2.isRepeating) { + final long repeatedNumDays = vector2[0]; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = evaluateDate(inputColVector1, i, repeatedNumDays); + } + } else { + for (int i = 0; i != n; i++) { + outputVector[i] = evaluateDate(inputColVector1, i, repeatedNumDays); + } + } } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; @@ -123,6 +138,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = evaluateTimestamp(inputColVector1, 0, vector2[0]); + } else if (inputColVector1.isRepeating) { + evaluateRepeatedTimestamp(inputColVector1, vector2, outV, + batch.selectedInUse, batch.selected, n); + } else if (inputColVector2.isRepeating) { + final long repeatedNumDays = vector2[0]; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = evaluateTimestamp(inputColVector1, i, repeatedNumDays); + } + } else { + for (int i = 0; i != n; i++) { + outputVector[i] = evaluateTimestamp(inputColVector1, i, repeatedNumDays); + } + } } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; @@ -144,6 +174,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { // Repeating property will not change. outV.isRepeating = true; evaluateString((BytesColumnVector) inputColVector1, outV, 0, vector2[0]); + } else if (inputColVector1.isRepeating) { + evaluateRepeatedString((BytesColumnVector) inputColVector1, vector2, outV, + batch.selectedInUse, batch.selected, n); + } else if (inputColVector2.isRepeating) { + final long repeatedNumDays = vector2[0]; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + evaluateString((BytesColumnVector) inputColVector1, outV, i, repeatedNumDays); + } + } else { + for (int i = 0; i != n; i++) { + evaluateString((BytesColumnVector) inputColVector1, outV, i, repeatedNumDays); + } + } } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; @@ -160,6 +205,33 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } + protected void evaluateRepeatedCommon(long days, long[] vector2, LongColumnVector outputVector, + boolean selectedInUse, int[] selected, int n) { + if (isPositive) { + if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = selected[j]; + outputVector.vector[i] = days + vector2[i]; + } + } else { + for (int i = 0; i != n; i++) { + outputVector.vector[i] = days + vector2[i]; + } + } + } else { + if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = selected[j]; + outputVector.vector[i] = days - vector2[i]; + } + } else { + for (int i = 0; i != n; i++) { + outputVector.vector[i] = days - vector2[i]; + } + } + } + } + protected long evaluateDate(ColumnVector columnVector, int index, long numDays) { LongColumnVector lcv = (LongColumnVector) columnVector; long days = lcv.vector[index]; @@ -171,6 +243,21 @@ protected long evaluateDate(ColumnVector columnVector, int index, long numDays) return days; } + protected void evaluateRepeatedDate(ColumnVector columnVector, + long[] vector2, LongColumnVector outputVector, + boolean selectedInUse, int[] selected, int n) { + if (columnVector.isNull[0]) { + outputVector.noNulls = false; + outputVector.isNull[0] = true; + outputVector.isRepeating = true; + return; + } + LongColumnVector lcv = (LongColumnVector) columnVector; + long days = lcv.vector[0]; + + evaluateRepeatedCommon(days, vector2, outputVector, selectedInUse, selected, n); + } + protected long evaluateTimestamp(ColumnVector columnVector, int index, long numDays) { TimestampColumnVector tcv = (TimestampColumnVector) columnVector; // Convert to date value (in days) @@ -183,6 +270,22 @@ protected long evaluateTimestamp(ColumnVector columnVector, int index, long numD return days; } + protected void evaluateRepeatedTimestamp(ColumnVector columnVector, + long[] vector2, LongColumnVector outputVector, + boolean selectedInUse, int[] selected, int n) { + if (columnVector.isNull[0]) { + outputVector.noNulls = false; + outputVector.isNull[0] = true; + outputVector.isRepeating = true; + return; + } + TimestampColumnVector tcv = (TimestampColumnVector) columnVector; + // Convert to date value (in days) + long days = DateWritable.millisToDays(tcv.getTime(0)); + + evaluateRepeatedCommon(days, vector2, outputVector, selectedInUse, selected, n); + } + protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVector outputVector, int index, long numDays) { if (inputColumnVector1.isNull[index]) { outputVector.noNulls = false; @@ -205,6 +308,29 @@ protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVe } } + protected void evaluateRepeatedString(BytesColumnVector inputColumnVector1, + long[] vector2, LongColumnVector outputVector, + boolean selectedInUse, int[] selected, int n) { + if (inputColumnVector1.isNull[0]) { + outputVector.noNulls = false; + outputVector.isNull[0] = true; + outputVector.isRepeating = true; + return; + } + text.set( + inputColumnVector1.vector[0], inputColumnVector1.start[0], inputColumnVector1.length[0]); + boolean parsed = dateParser.parseDate(text.toString(), date); + if (!parsed) { + outputVector.noNulls = false; + outputVector.isNull[0] = true; + outputVector.isRepeating = true; + return; + } + long days = DateWritable.millisToDays(date.getTime()); + + evaluateRepeatedCommon(days, vector2, outputVector, selectedInUse, selected, n); + } + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum1) + ", " + getColumnParamString(1, colNum2); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 0d418fd..c861321 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -38,6 +38,7 @@ private final int colNum; + private Object object; private long longValue = 0; private Timestamp timestampValue = null; private byte[] stringValue = null; @@ -61,6 +62,7 @@ public VectorUDFDateAddScalarCol(Object object, int colNum, int outputColumnNum) super(outputColumnNum); this.colNum = colNum; + this.object = object; if (object instanceof Long) { this.longValue = (Long) object; } else if (object instanceof Timestamp) { @@ -241,7 +243,19 @@ public void setPositive(boolean isPositive) { @Override public String vectorExpressionParameters() { - return "val " + stringValue + ", " + getColumnParamString(0, colNum); + String value; + if (object instanceof Long) { + Date tempDate = new Date(0); + tempDate.setTime(DateWritable.daysToMillis((int) longValue)); + value = tempDate.toString(); + } else if (object instanceof Timestamp) { + value = this.timestampValue.toString(); + } else if (object instanceof byte []) { + value = new String(this.stringValue, StandardCharsets.UTF_8); + } else { + value = "unknown"; + } + return "val " + value + ", " + getColumnParamString(0, colNum); } public VectorExpressionDescriptor.Descriptor getDescriptor() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java index 8de247c..4c2f872 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java @@ -19,9 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; import java.util.List; import java.util.Random; +import java.util.Set; +import java.util.TreeSet; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -83,18 +86,53 @@ private VectorRandomBatchSource( final int batchSize; final BitSet bitSet; + final int[] selected; - private VectorBatchPattern(int batchSize, BitSet bitSet) { + private VectorBatchPattern(Random random, int batchSize, + BitSet bitSet, boolean asSelected) { this.batchSize = batchSize; this.bitSet = bitSet; + if (asSelected) { + selected = randomSelection(random, batchSize); + } else { + selected = null; + } + } + + private int[] randomSelection(Random random, int batchSize) { + + // Random batchSize unique ordered integers of 1024 (VectorizedRowBatch.DEFAULT_SIZE) indices. + // This could be smarter... + Set selectedSet = new TreeSet(); + int currentCount = 0; + while (true) { + final int candidateIndex = random.nextInt(VectorizedRowBatch.DEFAULT_SIZE); + if (!selectedSet.contains(candidateIndex)) { + selectedSet.add(candidateIndex); + if (++currentCount == batchSize) { + Integer[] integerArray = selectedSet.toArray(new Integer[0]); + int[] result = new int[batchSize]; + for (int i = 0; i < batchSize; i++) { + result[i] = integerArray[i]; + } + return result; + } + } + } } public static VectorBatchPattern createRegularBatch(int batchSize) { - return new VectorBatchPattern(batchSize, null); + return new VectorBatchPattern(null, batchSize, null, false); + } + + public static VectorBatchPattern createRegularBatch(Random random, int batchSize, + boolean asSelected) { + return new VectorBatchPattern(random, batchSize, null, asSelected); } - public static VectorBatchPattern createRepeatedBatch(int batchSize, BitSet bitSet) { - return new VectorBatchPattern(batchSize, bitSet); + public static VectorBatchPattern createRepeatedBatch(Random random, int batchSize, + BitSet bitSet, boolean asSelected) { + return new VectorBatchPattern(random, batchSize, bitSet, asSelected); } public int getBatchSize() { @@ -105,13 +143,22 @@ public BitSet getBitSet() { return bitSet; } + public int[] getSelected() { + return selected; + } + public String toString() { String batchSizeString = "batchSize " + Integer.toString(batchSize); - if (bitSet == null) { - return batchSizeString; + if (bitSet != null) { + long bitMask = bitSet.toLongArray()[0]; + batchSizeString += " repeating 0x" + Long.toHexString(bitMask); + } + boolean selectedInUse = (selected != null); + batchSizeString += " selectedInUse " + selectedInUse; + if (selectedInUse) { + batchSizeString += " selected " + Arrays.toString(selected); } - long bitMask = bitSet.toLongArray()[0]; - return batchSizeString + " repeating 0x" + Long.toHexString(bitMask); + return batchSizeString; } } @@ -128,7 +175,7 @@ private static VectorBatchPatterns chooseBatchPatterns( final int columnCount = randomRows[0].length; - // Choose first up to a full batch. + // Choose first up to a full batch with no selection. final int regularBatchSize = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(regularBatchSize)); rowIndex += regularBatchSize; @@ -147,66 +194,83 @@ private static VectorBatchPatterns chooseBatchPatterns( int columnPermutationLimit = Math.min(columnCount, Long.SIZE); - // Repeated NULL permutations. - long columnPermutation = 1; - while (true) { - if (columnPermutation > columnPermutationLimit) { - break; - } - final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); - if (maximumRowCount == 0) { - break; - } - int randomRowCount = 1 + random.nextInt(maximumRowCount); - final int rowLimit = rowIndex + randomRowCount; - - BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); + boolean asSelected = false; - for (int columnNum = bitSet.nextSetBit(0); - columnNum >= 0; - columnNum = bitSet.nextSetBit(columnNum + 1)) { + /* + * Do a round each as physical with no row selection and logical with row selection. + */ + while (true) { - // Repeated NULL fill down column. - for (int r = rowIndex; r < rowLimit; r++) { - randomRows[r][columnNum] = null; + // Repeated NULL permutations. + long columnPermutation = 1; + while (true) { + if (columnPermutation > columnPermutationLimit) { + break; } - } - vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet)); - columnPermutation++; - rowIndex = rowLimit; - } + final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); + if (maximumRowCount == 0) { + break; + } + int randomRowCount = 1 + random.nextInt(maximumRowCount); + final int rowLimit = rowIndex + randomRowCount; - // Repeated non-NULL permutations. - columnPermutation = 1; - while (true) { - if (columnPermutation > columnPermutationLimit) { - break; - } - final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); - if (maximumRowCount == 0) { - break; - } - int randomRowCount = 1 + random.nextInt(maximumRowCount); - final int rowLimit = rowIndex + randomRowCount; + BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); - BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); + for (int columnNum = bitSet.nextSetBit(0); + columnNum >= 0; + columnNum = bitSet.nextSetBit(columnNum + 1)) { - for (int columnNum = bitSet.nextSetBit(0); - columnNum >= 0; - columnNum = bitSet.nextSetBit(columnNum + 1)) { + // Repeated NULL fill down column. + for (int r = rowIndex; r < rowLimit; r++) { + randomRows[r][columnNum] = null; + } + } + vectorBatchPatternList.add( + VectorBatchPattern.createRepeatedBatch( + random, randomRowCount, bitSet, asSelected)); + columnPermutation++; + rowIndex = rowLimit; + } - // Repeated non-NULL fill down column. - Object repeatedObject = randomRows[rowIndex][columnNum]; - if (repeatedObject == null) { - repeatedObject = nonNullRow[columnNum]; + // Repeated non-NULL permutations. + columnPermutation = 1; + while (true) { + if (columnPermutation > columnPermutationLimit) { + break; + } + final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); + if (maximumRowCount == 0) { + break; } - for (int r = rowIndex; r < rowLimit; r++) { - randomRows[r][columnNum] = repeatedObject; + int randomRowCount = 1 + random.nextInt(maximumRowCount); + final int rowLimit = rowIndex + randomRowCount; + + BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); + + for (int columnNum = bitSet.nextSetBit(0); + columnNum >= 0; + columnNum = bitSet.nextSetBit(columnNum + 1)) { + + // Repeated non-NULL fill down column. + Object repeatedObject = randomRows[rowIndex][columnNum]; + if (repeatedObject == null) { + repeatedObject = nonNullRow[columnNum]; + } + for (int r = rowIndex; r < rowLimit; r++) { + randomRows[r][columnNum] = repeatedObject; + } } + vectorBatchPatternList.add( + VectorBatchPattern.createRepeatedBatch( + random, randomRowCount, bitSet, asSelected)); + columnPermutation++; + rowIndex = rowLimit; + } + + if (asSelected) { + break; } - vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet)); - columnPermutation++; - rowIndex = rowLimit; + asSelected = true; } // Remaining batches. @@ -216,7 +280,10 @@ private static VectorBatchPatterns chooseBatchPatterns( break; } int randomRowCount = 1 + random.nextInt(maximumRowCount); - vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(randomRowCount)); + asSelected = random.nextBoolean(); + vectorBatchPatternList.add( + VectorBatchPattern.createRegularBatch( + random, randomRowCount, asSelected)); rowIndex += randomRowCount; } } @@ -278,6 +345,9 @@ public boolean fillNextBatch(VectorizedRowBatch batch) { VectorBatchPattern vectorBatchPattern = vectorBatchPatterns.getTectorBatchPatternList().get(batchCount); + + // System.out.println("*DEBUG* vectorBatchPattern " + vectorBatchPattern.toString()); + final int batchSize = vectorBatchPattern.getBatchSize(); for (int c = 0; c < columnCount; c++) { @@ -293,13 +363,25 @@ public boolean fillNextBatch(VectorizedRowBatch batch) { } } + int[] selected = vectorBatchPattern.getSelected(); + boolean selectedInUse = (selected != null); + batch.selectedInUse = selectedInUse; + if (selectedInUse) { + System.arraycopy(selected, 0, batch.selected, 0, batchSize); + } + int rowIndex = nextRowIndex; - for (int batchIndex = 0; batchIndex < batchSize; batchIndex++) { + for (int logicalIndex = 0; logicalIndex < batchSize; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); for (int c = 0; c < columnCount; c++) { - if (batch.cols[c].isRepeating && batchIndex > 0) { - continue; + if (batch.cols[c].isRepeating) { + if (logicalIndex > 0) { + continue; + } + vectorAssignRow.assignRowColumn(batch, 0, c, randomRows[rowIndex][c]); + } else { + vectorAssignRow.assignRowColumn(batch, batchIndex, c, randomRows[rowIndex][c]); } - vectorAssignRow.assignRowColumn(batch, batchIndex, c, randomRows[rowIndex][c]); } rowIndex++; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index 7877532..641ff10 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -977,6 +977,18 @@ public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitive } } + public static String randomPrimitiveDateStringObject(Random r) { + Date randomDate = RandomTypeUtil.getRandDate(r); + String randomDateString = randomDate.toString(); + return randomDateString; + } + + public static String randomPrimitiveTimestampStringObject(Random r) { + Timestamp randomTimestamp = RandomTypeUtil.getRandTimestamp(r); + String randomTimestampString = randomTimestamp.toString(); + return randomTimestampString; + } + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { final int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); final String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java index 0e300cf..30e8906 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java @@ -24,6 +24,8 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -45,17 +47,22 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; import junit.framework.Assert; @@ -222,12 +229,24 @@ private void doIfTests(Random random, String typeName, } } + private boolean needsValidDataTypeData(TypeInfo typeInfo) { + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + if (primitiveCategory == PrimitiveCategory.STRING || + primitiveCategory == PrimitiveCategory.CHAR || + primitiveCategory == PrimitiveCategory.VARCHAR || + primitiveCategory == PrimitiveCategory.BINARY) { + return false; + } + return true; + } + private void doIfTestOneCast(Random random, String typeName, DataTypePhysicalVariation dataTypePhysicalVariation, PrimitiveCategory targetPrimitiveCategory) throws Exception { TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); boolean isDecimal64 = (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64); final int decimal64Scale = @@ -272,6 +291,49 @@ private void doIfTestOneCast(Random random, String typeName, Object[][] randomRows = rowSource.randomRows(100000); + if (needsValidDataTypeData(targetTypeInfo) && + (primitiveCategory == PrimitiveCategory.STRING || + primitiveCategory == PrimitiveCategory.CHAR || + primitiveCategory == PrimitiveCategory.VARCHAR)) { + + // Regenerate string family with valid data for target data type. + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object object = randomRows[i][0]; + if (object == null) { + continue; + } + String string = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) targetTypeInfo).toString(); + Object newObject; + switch (primitiveCategory) { + case STRING: + newObject = new Text(string); + break; + case CHAR: + { + HiveChar hiveChar = + new HiveChar( + string, ((CharTypeInfo) typeInfo).getLength()); + newObject = new HiveCharWritable(hiveChar); + } + break; + case VARCHAR: + { + HiveVarchar hiveVarchar = + new HiveVarchar( + string, ((VarcharTypeInfo) typeInfo).getLength()); + newObject = new HiveVarcharWritable(hiveVarchar); + } + break; + default: + throw new RuntimeException("Unexpected string family category " + primitiveCategory); + } + randomRows[i][0] = newObject; + } + } + VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches( random, @@ -414,9 +476,12 @@ private boolean doRowCastTest(TypeInfo typeInfo, TypeInfo targetTypeInfo, private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, Object[] resultObjects) { - // UNDONE: selectedInUse - for (int i = 0; i < batch.size; i++) { - resultVectorExtractRow.extractRow(batch, i, scrqtchRow); + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); // UNDONE: Need to copy the object. resultObjects[rowIndex++] = scrqtchRow[0]; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java new file mode 100644 index 0000000..0f658c6 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java @@ -0,0 +1,527 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; + +import junit.framework.Assert; + +import org.junit.Test; + +public class TestVectorDateAddSub { + + @Test + public void testDate() throws Exception { + Random random = new Random(12882); + + doDateAddSubTests(random, "date", "smallint", true); + doDateAddSubTests(random, "date", "smallint", false); + doDateAddSubTests(random, "date", "int", true); + doDateAddSubTests(random, "date", "int", false); + } + + @Test + public void testTimestamp() throws Exception { + Random random = new Random(12882); + + doDateAddSubTests(random, "timestamp", "smallint", true); + doDateAddSubTests(random, "timestamp", "smallint", false); + doDateAddSubTests(random, "timestamp", "int", true); + doDateAddSubTests(random, "timestamp", "int", false); + } + + @Test + public void testStringFamily() throws Exception { + Random random = new Random(12882); + + doDateAddSubTests(random, "string", "smallint", true); + doDateAddSubTests(random, "string", "smallint", false); + doDateAddSubTests(random, "string", "int", true); + doDateAddSubTests(random, "string", "int", false); + + doDateAddSubTests(random, "char(20)", "int", true); + doDateAddSubTests(random, "char(20)", "int", false); + + doDateAddSubTests(random, "varchar(20)", "int", true); + doDateAddSubTests(random, "varchar(20)", "int", false); + } + + public enum DateAddSubTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + public enum ColumnScalarMode { + COLUMN_COLUMN, + COLUMN_SCALAR, + SCALAR_COLUMN; + + static final int count = values().length; + } + + private void doDateAddSubTests(Random random, String dateTimeStringTypeName, + String integerTypeName, boolean isAdd) + throws Exception { + for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { + doDateAddSubTestsWithDiffColumnScalar( + random, dateTimeStringTypeName, integerTypeName, columnScalarMode, isAdd); + } + } + + private Object smallerRange(Random random, + PrimitiveCategory integerPrimitiveCategory, boolean wantWritable) { + + switch (integerPrimitiveCategory) { + case SHORT: + { + short newRandomShort = (short) random.nextInt(20000); + if (wantWritable) { + return new ShortWritable(newRandomShort); + } else { + return newRandomShort; + } + } + case INT: + { + int newRandomInt = random.nextInt(40000); + if (wantWritable) { + return new IntWritable(newRandomInt); + } else { + return newRandomInt; + } + } + default: + throw new RuntimeException("Unsupported integer category " + integerPrimitiveCategory); + } + } + + private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + private Object randomDateStringFamily( + Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) { + + String randomDateString = VectorRandomRowSource.randomPrimitiveDateStringObject(random); + if (random.nextInt(40) == 39) { + + // Randomly corrupt. + int index = random.nextInt(randomDateString.length()); + char[] chars = randomDateString.toCharArray(); + chars[index] = alphabet.charAt(random.nextInt(alphabet.length())); + randomDateString = String.valueOf(chars); + } + + PrimitiveCategory dateTimeStringPrimitiveCategory = + ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory(); + switch (dateTimeStringPrimitiveCategory) { + case STRING: + return randomDateString; + case CHAR: + { + HiveChar hiveChar = + new HiveChar(randomDateString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength()); + if (wantWritable) { + return new HiveCharWritable(hiveChar); + } else { + return hiveChar; + } + } + case VARCHAR: + { + HiveVarchar hiveVarchar = + new HiveVarchar( + randomDateString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength()); + if (wantWritable) { + return new HiveVarcharWritable(hiveVarchar); + } else { + return hiveVarchar; + } + } + default: + throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory); + } + } + + private void doDateAddSubTestsWithDiffColumnScalar(Random random, String dateTimeStringTypeName, + String integerTypeName, ColumnScalarMode columnScalarMode, boolean isAdd) + throws Exception { + + TypeInfo dateTimeStringTypeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(dateTimeStringTypeName); + PrimitiveCategory dateTimeStringPrimitiveCategory = + ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory(); + boolean isStringFamily = + (dateTimeStringPrimitiveCategory == PrimitiveCategory.STRING || + dateTimeStringPrimitiveCategory == PrimitiveCategory.CHAR || + dateTimeStringPrimitiveCategory == PrimitiveCategory.VARCHAR); + + TypeInfo integerTypeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(integerTypeName); + PrimitiveCategory integerPrimitiveCategory = + ((PrimitiveTypeInfo) integerTypeInfo).getPrimitiveCategory(); + + List explicitTypeNameList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + + List columns = new ArrayList(); + int columnNum = 0; + ExprNodeDesc col1Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { + explicitTypeNameList.add(dateTimeStringTypeName); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col1Expr = new ExprNodeColumnDesc(dateTimeStringTypeInfo, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar1Object; + if (!isStringFamily) { + scalar1Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) dateTimeStringTypeInfo); + } else { + scalar1Object = + randomDateStringFamily( + random, dateTimeStringTypeInfo, /* wantWritable */ false); + } + col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo, scalar1Object); + } + ExprNodeDesc col2Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { + explicitTypeNameList.add(integerTypeName); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col2Expr = new ExprNodeColumnDesc(integerTypeInfo, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar2Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) integerTypeInfo); + scalar2Object = + smallerRange( + random, integerPrimitiveCategory, /* wantWritable */ false); + col2Expr = new ExprNodeConstantDesc(integerTypeInfo, scalar2Object); + } + + List children = new ArrayList(); + children.add(col1Expr); + children.add(col2Expr); + + //---------------------------------------------------------------------------------------------- + + String[] columnNames = columns.toArray(new String[0]); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initExplicitSchema( + random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + Object[][] randomRows = rowSource.randomRows(100000); + + if (isStringFamily) { + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + Object object = row[columnNum - 1]; + if (row[0] != null) { + row[0] = + randomDateStringFamily( + random, dateTimeStringTypeInfo, /* wantWritable */ true); + } + } + } + } + + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { + + // Fixup numbers to limit the range to 0 ... N-1. + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + if (row[columnNum - 1] != null) { + row[columnNum - 1] = + smallerRange( + random, integerPrimitiveCategory, /* wantWritable */ true); + } + } + } + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + String[] outputScratchTypeNames = new String[] { "date" }; + + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + outputScratchTypeNames, + null); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[DateAddSubTestMode.count][]; + for (int i = 0; i < DateAddSubTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + GenericUDF udf = + (isAdd ? new GenericUDFDateAdd() : new GenericUDFDateSub()); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(TypeInfoFactory.dateTypeInfo, udf, children); + + DateAddSubTestMode dateAddSubTestMode = DateAddSubTestMode.values()[i]; + switch (dateAddSubTestMode) { + case ROW_MODE: + doRowDateAddSubTest( + dateTimeStringTypeInfo, + integerTypeInfo, + columns, + children, + isAdd, + exprDesc, + randomRows, + columnScalarMode, + rowSource.rowStructObjectInspector(), + resultObjects); + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + doVectorDateAddSubTest( + dateTimeStringTypeInfo, + integerTypeInfo, + columns, + rowSource.typeInfos(), + children, + isAdd, + exprDesc, + dateAddSubTestMode, + columnScalarMode, + batchSource, + batchContext, + resultObjects); + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + dateAddSubTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < DateAddSubTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + " " + DateAddSubTestMode.values()[v] + + " isAdd " + isAdd + + " " + columnScalarMode + + " result is NULL " + (vectorResult == null) + + " does not match row-mode expected result is NULL " + (expectedResult == null) + + " row values " + Arrays.toString(randomRows[i])); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + " " + DateAddSubTestMode.values()[v] + + " isAdd " + isAdd + + " " + columnScalarMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i])); + } + } + } + } + } + + private void doRowDateAddSubTest(TypeInfo dateTimeStringTypeInfo, TypeInfo integerTypeInfo, + List columns, List children, + boolean isAdd, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, ColumnScalarMode columnScalarMode, + ObjectInspector rowInspector, Object[] resultObjects) throws Exception { + + System.out.println( + "*DEBUG* dateTimeStringTypeInfo " + dateTimeStringTypeInfo.toString() + + " integerTypeInfo " + integerTypeInfo + + " isAdd " + isAdd + + " dateAddSubTestMode ROW_MODE" + + " columnScalarMode " + columnScalarMode + + " exprDesc " + exprDesc.toString()); + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + evaluator.initialize(rowInspector); + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeInfoFactory.dateTypeInfo); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + TypeInfo targetTypeInfo, Object[] resultObjects) { + + ObjectInspector objectInspector = TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo(targetTypeInfo); + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private void doVectorDateAddSubTest(TypeInfo dateTimeStringTypeInfo, TypeInfo integerTypeInfo, + List columns, + TypeInfo[] typeInfos, + List children, + boolean isAdd, ExprNodeGenericFuncDesc exprDesc, + DateAddSubTestMode dateAddSubTestMode, ColumnScalarMode columnScalarMode, + VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, + Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (dateAddSubTestMode == DateAddSubTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[2]; + Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE); + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); + vectorExpression.transientInit(); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init(new TypeInfo[] { TypeInfoFactory.dateTypeInfo }, new int[] { columns.size() }); + Object[] scrqtchRow = new Object[1]; + + System.out.println( + "*DEBUG* dateTimeStringTypeInfo " + dateTimeStringTypeInfo.toString() + + " integerTypeInfo " + integerTypeInfo + + " isAdd " + isAdd + + " dateAddSubTestMode " + dateAddSubTestMode + + " columnScalarMode " + columnScalarMode + + " vectorExpression " + vectorExpression.toString()); + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + TypeInfoFactory.dateTypeInfo, resultObjects); + rowIndex += batch.size; + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java new file mode 100644 index 0000000..2769720 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java @@ -0,0 +1,527 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateDiff; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorDateDiff { + + private static final boolean corruptDateStrings = false; + + @Test + public void testDateDate() throws Exception { + Random random = new Random(7743); + + doDateDiffTests(random, "date", "date"); + } + + @Ignore("HIVE-19493") + @Test + public void testDateTimestamp() throws Exception { + Random random = new Random(7743); + + doDateDiffTests(random, "date", "timestamp"); + } + + @Ignore("HIVE-19493") + @Test + public void testDateString() throws Exception { + Random random = new Random(7743); + + doDateDiffTests(random, "date", "string"); + } + + @Ignore("HIVE-19493") + @Test + public void testTimestampDate() throws Exception { + Random random = new Random(82); + + doDateDiffTests(random, "timestamp", "date"); + } + + @Ignore("HIVE-19493") + @Test + public void testTimestampTimestamp() throws Exception { + Random random = new Random(82); + + doDateDiffTests(random, "timestamp", "timestamp"); + } + + @Ignore("HIVE-19493") + @Test + public void testTimestampString() throws Exception { + Random random = new Random(82); + + doDateDiffTests(random, "timestamp", "string"); + } + + @Ignore("HIVE-19493") + @Test + public void testStringFamily() throws Exception { + Random random = new Random(12882); + + doDateDiffTests(random, "char(20)", "date"); + doDateDiffTests(random, "char(20)", "timestamp"); + doDateDiffTests(random, "char(20)", "string"); + + doDateDiffTests(random, "varchar(20)", "date"); + doDateDiffTests(random, "varchar(20)", "timestamp"); + doDateDiffTests(random, "varchar(20)", "string"); + } + + public enum DateDiffTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + public enum ColumnScalarMode { + COLUMN_COLUMN, + COLUMN_SCALAR, + SCALAR_COLUMN; + + static final int count = values().length; + } + + private void doDateDiffTests(Random random, String dateTimeStringTypeName, + String integerTypeName) + throws Exception { + for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { + doDateDiffTestsWithDiffColumnScalar( + random, dateTimeStringTypeName, integerTypeName, columnScalarMode); + } + } + + private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + private Object randomDateStringFamily( + Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) { + + String randomDateString = VectorRandomRowSource.randomPrimitiveDateStringObject(random); + if (corruptDateStrings && random.nextInt(40) == 39) { + + // Randomly corrupt. + int index = random.nextInt(randomDateString.length()); + char[] chars = randomDateString.toCharArray(); + chars[index] = alphabet.charAt(random.nextInt(alphabet.length())); + randomDateString = String.valueOf(chars); + } + + PrimitiveCategory dateTimeStringPrimitiveCategory = + ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory(); + switch (dateTimeStringPrimitiveCategory) { + case STRING: + return randomDateString; + case CHAR: + { + HiveChar hiveChar = + new HiveChar(randomDateString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength()); + if (wantWritable) { + return new HiveCharWritable(hiveChar); + } else { + return hiveChar; + } + } + case VARCHAR: + { + HiveVarchar hiveVarchar = + new HiveVarchar( + randomDateString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength()); + if (wantWritable) { + return new HiveVarcharWritable(hiveVarchar); + } else { + return hiveVarchar; + } + } + default: + throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory); + } + } + + private void doDateDiffTestsWithDiffColumnScalar(Random random, String dateTimeStringTypeName1, + String dateTimeStringTypeName2, ColumnScalarMode columnScalarMode) + throws Exception { + + TypeInfo dateTimeStringTypeInfo1 = + TypeInfoUtils.getTypeInfoFromTypeString(dateTimeStringTypeName1); + PrimitiveCategory dateTimeStringPrimitiveCategory1 = + ((PrimitiveTypeInfo) dateTimeStringTypeInfo1).getPrimitiveCategory(); + boolean isStringFamily1 = + (dateTimeStringPrimitiveCategory1 == PrimitiveCategory.STRING || + dateTimeStringPrimitiveCategory1 == PrimitiveCategory.CHAR || + dateTimeStringPrimitiveCategory1 == PrimitiveCategory.VARCHAR); + + TypeInfo dateTimeStringTypeInfo2 = + TypeInfoUtils.getTypeInfoFromTypeString(dateTimeStringTypeName2); + PrimitiveCategory dateTimeStringPrimitiveCategory2 = + ((PrimitiveTypeInfo) dateTimeStringTypeInfo2).getPrimitiveCategory(); + boolean isStringFamily2 = + (dateTimeStringPrimitiveCategory2 == PrimitiveCategory.STRING || + dateTimeStringPrimitiveCategory2 == PrimitiveCategory.CHAR || + dateTimeStringPrimitiveCategory2 == PrimitiveCategory.VARCHAR); + + List explicitTypeNameList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + + List columns = new ArrayList(); + int columnNum = 0; + ExprNodeDesc col1Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { + explicitTypeNameList.add(dateTimeStringTypeName1); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col1Expr = new ExprNodeColumnDesc(dateTimeStringTypeInfo1, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar1Object; + if (!isStringFamily1) { + scalar1Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) dateTimeStringTypeInfo1); + } else { + scalar1Object = + randomDateStringFamily( + random, dateTimeStringTypeInfo1, /* wantWritable */ false); + } + col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo1, scalar1Object); + } + ExprNodeDesc col2Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { + explicitTypeNameList.add(dateTimeStringTypeName2); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col2Expr = new ExprNodeColumnDesc(dateTimeStringTypeInfo2, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar2Object; + if (!isStringFamily2) { + scalar2Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) dateTimeStringTypeInfo2); + } else { + scalar2Object = + randomDateStringFamily( + random, dateTimeStringTypeInfo2, /* wantWritable */ false); + } + col2Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo2, scalar2Object); + } + + List children = new ArrayList(); + children.add(col1Expr); + children.add(col2Expr); + + //---------------------------------------------------------------------------------------------- + + String[] columnNames = columns.toArray(new String[0]); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initExplicitSchema( + random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + Object[][] randomRows = rowSource.randomRows(100000); + + if (isStringFamily1) { + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + Object object = row[columnNum - 1]; + if (row[0] != null) { + row[0] = + randomDateStringFamily( + random, dateTimeStringTypeInfo1, /* wantWritable */ true); + } + } + } + } + + if (isStringFamily2) { + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + Object object = row[columnNum - 1]; + if (row[columnNum - 1] != null) { + row[columnNum - 1] = + randomDateStringFamily( + random, dateTimeStringTypeInfo2, /* wantWritable */ true); + } + } + } + } + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + String[] outputScratchTypeNames = new String[] { "date" }; + + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + outputScratchTypeNames, + null); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[DateDiffTestMode.count][]; + for (int i = 0; i < DateDiffTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + + GenericUDF udf = new GenericUDFDateDiff(); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, udf, children); + + DateDiffTestMode dateDiffTestMode = DateDiffTestMode.values()[i]; + switch (dateDiffTestMode) { + case ROW_MODE: + doRowDateAddSubTest( + dateTimeStringTypeInfo1, + dateTimeStringTypeInfo2, + columns, + children, + exprDesc, + randomRows, + columnScalarMode, + rowSource.rowStructObjectInspector(), + resultObjects); + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + doVectorDateAddSubTest( + dateTimeStringTypeInfo1, + dateTimeStringTypeInfo2, + columns, + rowSource.typeInfos(), + children, + exprDesc, + dateDiffTestMode, + columnScalarMode, + batchSource, + batchContext, + resultObjects); + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + dateDiffTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < DateDiffTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + " " + DateDiffTestMode.values()[v] + + " " + columnScalarMode + + " result is NULL " + (vectorResult == null) + + " does not match row-mode expected result is NULL " + (expectedResult == null) + + " row values " + Arrays.toString(randomRows[i])); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + " " + DateDiffTestMode.values()[v] + + " " + columnScalarMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i])); + } + } + } + } + } + + private void doRowDateAddSubTest(TypeInfo dateTimeStringTypeInfo1, + TypeInfo dateTimeStringTypeInfo2, + List columns, List children, + ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, ColumnScalarMode columnScalarMode, + ObjectInspector rowInspector, Object[] resultObjects) throws Exception { + + System.out.println( + "*DEBUG* dateTimeStringTypeInfo " + dateTimeStringTypeInfo1.toString() + + " dateTimeStringTypeInfo2 " + dateTimeStringTypeInfo2 + + " dateDiffTestMode ROW_MODE" + + " columnScalarMode " + columnScalarMode + + " exprDesc " + exprDesc.toString()); + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + evaluator.initialize(rowInspector); + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeInfoFactory.intTypeInfo); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + // UNDONE: Need to copy the object? + resultObjects[rowIndex++] = scrqtchRow[0]; + } + } + + private void doVectorDateAddSubTest(TypeInfo dateTimeStringTypeInfo1, + TypeInfo dateTimeStringTypeInfo2, + List columns, + TypeInfo[] typeInfos, + List children, + ExprNodeGenericFuncDesc exprDesc, + DateDiffTestMode dateDiffTestMode, ColumnScalarMode columnScalarMode, + VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, + Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (dateDiffTestMode == DateDiffTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[2]; + Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE); + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); + vectorExpression.transientInit(); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init(new TypeInfo[] { TypeInfoFactory.intTypeInfo }, new int[] { columns.size() }); + Object[] scrqtchRow = new Object[1]; + + System.out.println( + "*DEBUG* dateTimeStringTypeInfo1 " + dateTimeStringTypeInfo1.toString() + + " dateTimeStringTypeInfo2 " + dateTimeStringTypeInfo2.toString() + + " dateDiffTestMode " + dateDiffTestMode + + " columnScalarMode " + columnScalarMode + + " vectorExpression " + vectorExpression.toString()); + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, resultObjects); + rowIndex += batch.size; + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java index c52ca19..e54ccaa 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java @@ -43,6 +43,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -178,16 +180,30 @@ private void doIfTests(Random random, String typeName) private void doIfTests(Random random, String typeName, DataTypePhysicalVariation dataTypePhysicalVariation) throws Exception { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doIfTestsWithDiffColumnScalar( - random, typeName, columnScalarMode, dataTypePhysicalVariation); - } + doIfTestsWithDiffColumnScalar( + random, typeName, ColumnScalarMode.COLUMN_COLUMN, dataTypePhysicalVariation, false, false); + doIfTestsWithDiffColumnScalar( + random, typeName, ColumnScalarMode.COLUMN_SCALAR, dataTypePhysicalVariation, false, false); + doIfTestsWithDiffColumnScalar( + random, typeName, ColumnScalarMode.COLUMN_SCALAR, dataTypePhysicalVariation, false, true); + doIfTestsWithDiffColumnScalar( + random, typeName, ColumnScalarMode.SCALAR_COLUMN, dataTypePhysicalVariation, false, false); + doIfTestsWithDiffColumnScalar( + random, typeName, ColumnScalarMode.SCALAR_COLUMN, dataTypePhysicalVariation, true, false); + doIfTestsWithDiffColumnScalar( + random, typeName, ColumnScalarMode.SCALAR_SCALAR, dataTypePhysicalVariation, false, false); } private void doIfTestsWithDiffColumnScalar(Random random, String typeName, - ColumnScalarMode columnScalarMode, DataTypePhysicalVariation dataTypePhysicalVariation) + ColumnScalarMode columnScalarMode, DataTypePhysicalVariation dataTypePhysicalVariation, + boolean isNullScalar1, boolean isNullScalar2) throws Exception { + System.out.println("*DEBUG* typeName " + typeName + + " columnScalarMode " + columnScalarMode + + " isNullScalar1 " + isNullScalar1 + + " isNullScalar2 " + isNullScalar2); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); boolean isDecimal64 = (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64); @@ -225,9 +241,14 @@ private void doIfTestsWithDiffColumnScalar(Random random, String typeName, col2Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); columns.add(columnName); } else { - Object scalar1Object = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) typeInfo); + Object scalar1Object; + if (isNullScalar1) { + scalar1Object = null; + } else { + scalar1Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo); + } col2Expr = new ExprNodeConstantDesc(typeInfo, scalar1Object); } ExprNodeDesc col3Expr; @@ -237,9 +258,14 @@ private void doIfTestsWithDiffColumnScalar(Random random, String typeName, col3Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); columns.add(columnName); } else { - Object scalar2Object = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) typeInfo); + Object scalar2Object; + if (isNullScalar2) { + scalar2Object = null; + } else { + scalar2Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo); + } col3Expr = new ExprNodeConstantDesc(typeInfo, scalar2Object); } @@ -369,13 +395,22 @@ private void doRowIfTest(TypeInfo typeInfo, List columns, List explicitTypeNameList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + + List columns = new ArrayList(); + int columnNum = 0; + ExprNodeDesc col1Expr; + explicitTypeNameList.add(dateTimeStringTypeName); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col1Expr = new ExprNodeColumnDesc(dateTimeStringTypeInfo, columnName, "table", false); + columns.add(columnName); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initExplicitSchema( + random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + List children = new ArrayList(); + children.add(col1Expr); + + //---------------------------------------------------------------------------------------------- + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + + if (isStringFamily) { + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + Object object = row[columnNum - 1]; + if (row[0] != null) { + row[0] = + randomTimestampStringFamily( + random, dateTimeStringTypeInfo, /* wantWritable */ true); + } + } + } + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + if (dateTimeStringPrimitiveCategory == PrimitiveCategory.DATE && + (extractFunctionName.equals("hour") || + extractFunctionName.equals("minute") || + extractFunctionName.equals("second"))) { + return; + } + + final UDF udf; + switch (extractFunctionName) { + case "day": + udf = new UDFDayOfMonth(); + break; + case "dayofweek": + udf = new UDFDayOfWeek(); + break; + case "hour": + udf = new UDFHour(); + break; + case "minute": + udf = new UDFMinute(); + break; + case "month": + udf = new UDFMonth(); + break; + case "second": + udf = new UDFSecond(); + break; + case "yearweek": + udf = new UDFWeekOfYear(); + break; + case "year": + udf = new UDFYear(); + break; + default: + throw new RuntimeException("Unexpected extract function name " + extractFunctionName); + } + + GenericUDFBridge genericUDFBridge = new GenericUDFBridge(); + genericUDFBridge.setUdfClassName(udf.getClass().getName()); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, genericUDFBridge, children); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[TimestampExtractTestMode.count][]; + for (int i = 0; i < TimestampExtractTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + TimestampExtractTestMode timestampExtractTestMode = TimestampExtractTestMode.values()[i]; + switch (timestampExtractTestMode) { + case ROW_MODE: + if (!doRowCastTest( + dateTimeStringTypeInfo, + columns, + children, + exprDesc, + randomRows, + rowSource.rowStructObjectInspector(), + resultObjects)) { + return; + } + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + if (!doVectorCastTest( + dateTimeStringTypeInfo, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + exprDesc, + timestampExtractTestMode, + batchSource, + resultObjects)) { + return; + } + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + timestampExtractTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < TimestampExtractTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " dateTimeStringTypeName " + dateTimeStringTypeName + + " extractFunctionName " + extractFunctionName + + " " + TimestampExtractTestMode.values()[v] + + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + + " does not match row-mode expected result is NULL " + + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + " row values " + Arrays.toString(randomRows[i])); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " dateTimeStringTypeName " + dateTimeStringTypeName + + " extractFunctionName " + extractFunctionName + + " " + TimestampExtractTestMode.values()[v] + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i])); + } + } + } + } + } + + private boolean doRowCastTest(TypeInfo dateTimeStringTypeInfo, + List columns, List children, + ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, ObjectInspector rowInspector, Object[] resultObjects) + throws Exception { + + System.out.println( + "*DEBUG* dateTimeStringTypeInfo " + dateTimeStringTypeInfo.toString() + + " timestampExtractTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + try { + evaluator.initialize(rowInspector); + } catch (HiveException e) { + return false; + } + + ObjectInspector objectInspector = TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo( + TypeInfoFactory.intTypeInfo); + + PrimitiveCategory dateTimeStringPrimitiveCategory = + ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory(); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object object = row[0]; + + Object result; + switch (dateTimeStringPrimitiveCategory) { + case TIMESTAMP: + result = evaluator.evaluate((TimestampWritable) object); + break; + case DATE: + result = evaluator.evaluate((DateWritable) object); + break; + case STRING: + { + Text text; + if (object == null) { + text = null; + } else { + text = new Text(); + text.set((String) object); + } + result = evaluator.evaluate(text); + } + break; + default: + throw new RuntimeException( + "Unexpected date timestamp string primitive category " + + dateTimeStringPrimitiveCategory); + } + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + + return true; + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + TypeInfo targetTypeInfo, Object[] resultObjects) { + + ObjectInspector objectInspector = TypeInfoUtils + .getStandardWritableObjectInspectorFromTypeInfo(targetTypeInfo); + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + + } + + private boolean doVectorCastTest(TypeInfo dateTimeStringTypeInfo, + List columns, String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + ExprNodeGenericFuncDesc exprDesc, + TimestampExtractTestMode timestampExtractTestMode, + VectorRandomBatchSource batchSource, + Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (timestampExtractTestMode == TimestampExtractTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); + vectorExpression.transientInit(); + + System.out.println( + "*DEBUG* dateTimeStringTypeInfo " + dateTimeStringTypeInfo.toString() + + " timestampExtractTestMode " + timestampExtractTestMode + + " vectorExpression " + vectorExpression.getClass().getSimpleName()); + + VectorRandomRowSource rowSource = batchSource.getRowSource(); + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vectorizationContext.getScratchColumnTypeNames(), + vectorizationContext.getScratchDataTypePhysicalVariations()); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + + resultVectorExtractRow.init( + new TypeInfo[] { TypeInfoFactory.intTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); + Object[] scrqtchRow = new Object[1]; + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + TypeInfoFactory.intTypeInfo, resultObjects); + rowIndex += batch.size; + } + + return true; + } +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 953604c..e386109 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -428,14 +428,13 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // at position 0 is undefined if the position 0 value is null. if (noNulls || !isNull[0]) { - // loops start at position 1 because position 0 is already set if (selectedInUse) { - for (int j = 1; j < size; j++) { + for (int j = 0; j < size; j++) { int i = sel[j]; this.setRef(i, vector[0], start[0], length[0]); } } else { - for (int i = 1; i < size; i++) { + for (int i = 0; i < size; i++) { this.setRef(i, vector[0], start[0], length[0]); } } diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 45fa739..e81a1ac 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -1470,17 +1470,6 @@ private void generateFilterStringColumnBetween(String[] tdesc) throws IOExceptio private void generateFilterTruncStringColumnBetween(String[] tdesc) throws IOException { String truncStringTypeName = tdesc[1]; - String truncStringHiveType; - String truncStringHiveGetBytes; - if ("Char".equals(truncStringTypeName)) { - truncStringHiveType = "HiveChar"; - truncStringHiveGetBytes = "getStrippedValue().getBytes()"; - } else if ("VarChar".equals(truncStringTypeName)) { - truncStringHiveType = "HiveVarchar"; - truncStringHiveGetBytes = "getValue().getBytes()"; - } else { - throw new Error("Unsupported string type: " + truncStringTypeName); - } String optionalNot = tdesc[2]; String className = "Filter" + truncStringTypeName + "Column" + (optionalNot.equals("!") ? "Not" : "") + "Between"; @@ -1488,8 +1477,6 @@ private void generateFilterTruncStringColumnBetween(String[] tdesc) throws IOExc File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", truncStringTypeName); - templateString = templateString.replaceAll("", truncStringHiveType); - templateString = templateString.replaceAll("", truncStringHiveGetBytes); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", optionalNot); @@ -1584,13 +1571,13 @@ private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exce getValueMethod = ".getBytes()"; conversionMethod = ""; } else if (operandType.equals("char")) { - defaultValue = "new HiveChar(\"\", 1)"; + defaultValue = "new byte[0]"; vectorType = "byte[]"; getPrimitiveMethod = "getHiveChar"; getValueMethod = ".getStrippedValue().getBytes()"; // Does vectorization use stripped char values? conversionMethod = ""; } else if (operandType.equals("varchar")) { - defaultValue = "new HiveVarchar(\"\", 1)"; + defaultValue = "new byte[0]"; vectorType = "byte[]"; getPrimitiveMethod = "getHiveVarchar"; getValueMethod = ".getValue().getBytes()"; @@ -2110,17 +2097,6 @@ private void generateStringColumnCompareScalar(String[] tdesc, String className) private void generateStringCompareTruncStringScalar(String[] tdesc, String className, String baseClassName) throws IOException { String truncStringTypeName = tdesc[1]; - String truncStringHiveType; - String truncStringHiveGetBytes; - if ("Char".equals(truncStringTypeName)) { - truncStringHiveType = "HiveChar"; - truncStringHiveGetBytes = "getStrippedValue().getBytes()"; - } else if ("VarChar".equals(truncStringTypeName)) { - truncStringHiveType = "HiveVarchar"; - truncStringHiveGetBytes = "getValue().getBytes()"; - } else { - throw new Error("Unsupported string type: " + truncStringTypeName); - } String operatorSymbol = tdesc[3]; // Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); @@ -2130,8 +2106,6 @@ private void generateStringCompareTruncStringScalar(String[] tdesc, String class templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operatorSymbol); templateString = templateString.replaceAll("", truncStringTypeName); - templateString = templateString.replaceAll("", truncStringHiveType); - templateString = templateString.replaceAll("", truncStringHiveGetBytes); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); }