commit e218908532fc75bd609fe3b97312f0675152dd94 Author: Vihang Karajgaonkar Date: Sun Jan 14 09:47:26 2018 -0800 HIVE-18421 : Vectorized execution handles overflows in a different manner than non-vectorized execution diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4f2e6d31af85cf1d9866ad1419f5c06a18eea347..8f8e0e140d7ebe832d2aab835621349c49b06f98 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2972,6 +2972,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "The default value is true."), HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", true, "This flag should be set to true to enable vectorization of ROW__ID."), + HIVE_VECTORIZATION_USE_CHECKED_EXPRESSIONS("hive.vectorized.use.checked.expressions", true, + "This flag should be set to true to use overflow checked vector expressions."), HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED( "hive.vectorized.input.format.supports.enabled", @@ -2983,7 +2985,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE("hive.test.vectorized.execution.enabled.override", "none", new StringSet("none", "enable", "disable"), "internal use only, used to override the hive.vectorized.execution.enabled setting and\n" + - "turn off vectorization. The default is false, or course", + "turn off vectorization. The default is false, of course", true), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index b5011c3adcedf8974d3241994733e0021a851cbd..64c4e01e27ed19b3d88373f6bcf2a9ae0b5054bd 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -122,7 +123,13 @@ public class extends VectorExpression { } } } - + +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED /* For the case when the output can have null values, follow * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors @@ -132,6 +139,13 @@ public class extends VectorExpression { NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum1) + ", " + getColumnParamString(1, colNum2); diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index cbec1abcc2b66f3ffc91b4778daf5017eff4379d..e7c23855196aae0fd287d81bd65d2e23240f6bd0 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -103,10 +104,22 @@ public class extends VectorExpression { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } - +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum) + ", val " + value; diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index 3e955578933dd7990939865527c3bd11023b3a90..dd5330d829a7f311d5f3aacd524de4218a8dc166 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -143,6 +144,12 @@ public class extends VectorExpression { } } +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED /* For the case when the output can have null values, follow * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors @@ -157,6 +164,13 @@ public class extends VectorExpression { } } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum1) + ", " + getColumnParamString(1, colNum2); diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index f0ab4711e79c8a1bfceebcde9a3dda2b4e15a38a..b200ef97710584f93c4d2269f8d3694cf0e1c08f 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -100,7 +101,21 @@ public class extends VectorExpression { } outputColVector.isRepeating = false; } + +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED + } + +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; } +#ENDIF CHECKED @Override public String vectorExpressionParameters() { diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index e95baa6199e138a4e0c009e62ce495b626e5909c..67106c296677f6bd93584eca2b5d8834cedb8732 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; * of these ColumnVector imports may be needed. Listing both of them * rather than using ....vectorization.*; */ +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -115,10 +116,23 @@ public class extends VectorExpression { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return "val " + value + ", " + getColumnParamString(1, colNum); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index bbe78c8720e16163b642f54d27fdf6b65ba9850b..3167e9e9eb34afe2933bb475e595dcb080597f7f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -351,22 +351,37 @@ public String toString() { } } - public Class getVectorExpressionClass(Class udf, Descriptor descriptor) throws HiveException { + public Class getVectorExpressionClass(Class udf, Descriptor descriptor, + boolean useCheckedExpressionIfAvailable) throws HiveException { VectorizedExpressions annotation = AnnotationUtils.getAnnotation(udf, VectorizedExpressions.class); if (annotation == null || annotation.value() == null) { return null; } Class[] list = annotation.value(); + Class matchedVe = null; for (Class ve : list) { try { - if (ve.newInstance().getDescriptor().matches(descriptor)) { - return ve; + VectorExpression candidateVe = ve.newInstance(); + if (candidateVe.getDescriptor().matches(descriptor)) { + if (!useCheckedExpressionIfAvailable) { + // no need to look further for a checked variant of this expression + return ve; + } else if (candidateVe.supportsCheckedExecution()) { + return ve; + } else { + // vector expression doesn't support checked execution + // hold on to it in case there is no available checked variant + matchedVe = ve; + } } } catch (Exception ex) { throw new HiveException("Could not instantiate VectorExpression class " + ve.getSimpleName(), ex); } } + if (matchedVe != null) { + return matchedVe; + } if (LOG.isDebugEnabled()) { LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString()); for (Class ve : list) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d46eb8d737cf2885b7f34004a9bd9eadbcf6af7a..3c8d78dfb1a382fbe9f706c82f35dba668438e9f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -132,6 +132,8 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { } private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + //when set to true use the overflow checked vector expressions + private boolean useCheckedVectorExpressions; private boolean reuseScratchColumns = HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS.defaultBoolVal; @@ -141,6 +143,8 @@ private void setHiveConfVars(HiveConf hiveConf) { this.reuseScratchColumns = HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); this.ocm.setReuseColumns(reuseScratchColumns); + useCheckedVectorExpressions = + HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_CHECKED_EXPRESSIONS); } private void copyHiveConfVars(VectorizationContext vContextEnvironment) { @@ -1523,7 +1527,8 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd } VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class vectorClass = this.vMap.getVectorExpressionClass(udfClass, descriptor); + Class vectorClass = + this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); if (vectorClass == null) { return null; } @@ -1665,7 +1670,8 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, } } VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor); + Class vclass = + this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); if (vclass == null) { if (LOG.isDebugEnabled()) { LOG.debug("No vector udf found for "+udfClass.getSimpleName() + ", descriptor: "+descriptor); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java new file mode 100644 index 0000000000000000000000000000000000000000..5c2965c54b35598847443119f5a2a2ec58a7264e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java @@ -0,0 +1,28 @@ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class LongColModuloLongColumnChecked extends LongColModuloLongColumn { + public LongColModuloLongColumnChecked(int colNum1, int colNum2, int outputColumnNum) { + super(colNum1, colNum2, outputColumnNum); + } + + public LongColModuloLongColumnChecked() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + super.evaluate(batch); + //checked for overflow based on the outputTypeInfo + OverflowUtils + .accountForOverflowLong(outputTypeInfo, (LongColumnVector) batch.cols[outputColumnNum], batch.selectedInUse, + batch.selected, batch.size); + } + + @Override + public boolean supportsCheckedExecution() { + return true; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OverflowUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OverflowUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..37b69777276bd72b2eff24ded6af7a395d8607ba --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OverflowUtils.java @@ -0,0 +1,104 @@ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import java.io.IOException; + +/** + * Utility methods to handle integer overflow/underflows + * in a ColumnVector + */ +public class OverflowUtils { + public static void accountForOverflowLong(TypeInfo outputTypeInfo, LongColumnVector v, + boolean selectedInUse, int[] sel, int n) { + if (outputTypeInfo == null) { + throw new RuntimeException( + "Output type info is not set for column vector. Cannot perform overflow check"); + } + switch (outputTypeInfo.getTypeName()) { + case serdeConstants.TINYINT_TYPE_NAME: { + //byte + if (v.isRepeating) { + v.vector[0] = (byte) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (byte) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (byte) v.vector[i]; + } + } + break; + } + case serdeConstants.SMALLINT_TYPE_NAME: { + //short + if (v.isRepeating) { + v.vector[0] = (short) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (short) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (short) v.vector[i]; + } + } + break; + } + case serdeConstants.INT_TYPE_NAME: { + //int + if (v.isRepeating) { + v.vector[0] = (int) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (int) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (int) v.vector[i]; + } + } + break; + } + default: + //nothing to be done + } + } + + + public static void accountForOverflowDouble(TypeInfo outputTypeInfo, DoubleColumnVector v, + boolean selectedInUse, int[] sel, int n) { + if (outputTypeInfo == null) { + throw new RuntimeException( + "Output type info is not set for column vector. Cannot perform overflow check"); + } + switch (outputTypeInfo.getTypeName()) { + case serdeConstants.FLOAT_TYPE_NAME: { + //float + if (v.isRepeating) { + v.vector[0] = (float) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (float) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (float) v.vector[i]; + } + } + break; + } + default: + //nothing to be done + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java index 75ec419aa9ea5c3fcc5e7314fbac756d6a5d36d5..bb74fb77347ecccefc1dda1b77c56de7c65b8f53 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde.serdeConstants; public class PosModDoubleToDouble extends MathFuncDoubleToDouble { private static final long serialVersionUID = 1L; @@ -39,7 +40,13 @@ public PosModDoubleToDouble() { @Override protected double func(double v) { - + // if the outputType is a float cast the arguments to float to replicate the overflow behavior + // in non-vectorized UDF GenericUDFPosMod + if (serdeConstants.FLOAT_TYPE_NAME.equals(outputTypeInfo.getTypeName())) { + float castedV = (float) v; + float castedDivisor = (float) divisor; + return ((castedV % castedDivisor) + castedDivisor) % castedDivisor; + } // return positive modulo return ((v % divisor) + divisor) % divisor; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java index 6b4d714c9a79a55593c4a4d254267a3035abb10f..3d5f456c8d2642111f6629ce9e4165f5c26296b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde.serdeConstants; public class PosModLongToLong extends MathFuncLongToLong { private static final long serialVersionUID = 1L; @@ -39,9 +40,30 @@ public PosModLongToLong() { @Override protected long func(long v) { - - // return positive modulo - return ((v % divisor) + divisor) % divisor; + // pmod calculation can overflow based on the type of arguments + // casting the arguments according to outputTypeInfo so that the + // results match with GenericUDFPosMod implementation + switch (outputTypeInfo.getTypeName()) { + case serdeConstants.TINYINT_TYPE_NAME : { + byte castedV = (byte) v; + byte castedDivisor = (byte) divisor; + return ((castedV % castedDivisor) + castedDivisor) % castedDivisor; + } + case serdeConstants.SMALLINT_TYPE_NAME : { + short castedV = (short) v; + short castedDivisor = (short) divisor; + return ((castedV % castedDivisor) + castedDivisor) % castedDivisor; + } + case serdeConstants.INT_TYPE_NAME : { + int castedV = (int) v; + int castedDivisor = (int) divisor; + return ((castedV % castedDivisor) + castedDivisor) % castedDivisor; + } + default : { + // default is using long types + return ((v % divisor) + divisor) % divisor; + } + } } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 710165033627b33d9b238cc847dbac36c07ee5f6..4407961ab4566e4e1160c0ffa464e5ac314b5809 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -307,6 +307,17 @@ public static String getTypeName(TypeInfo typeInfo, DataTypePhysicalVariation da } } + /** + * A vector expression which implements a checked execution to account for overflow handling + * should override this method and return true. In such a case Vectorizer will use Checked + * variation of the vector expression to process data + * @return true if vector expression implements a Checked variation of vector expression + */ + public boolean supportsCheckedExecution() { + // default is false + return false; + } + @Override public String toString() { StringBuilder b = new StringBuilder(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java index af8552caa02f2896f393a5099abdb1ae5abd4c16..3c1a0795d906d0f7093705b024fa009aae7b84e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java @@ -27,11 +27,17 @@ @Description(name = "-", value = "a _FUNC_ b - Returns the difference a-b") @VectorizedExpressions({LongColSubtractLongColumn.class, LongColSubtractDoubleColumn.class, + LongColSubtractLongColumnChecked.class, LongColSubtractDoubleColumnChecked.class, DoubleColSubtractLongColumn.class, DoubleColSubtractDoubleColumn.class, + DoubleColSubtractLongColumnChecked.class, DoubleColSubtractDoubleColumnChecked.class, LongColSubtractLongScalar.class, LongColSubtractDoubleScalar.class, + LongColSubtractLongScalarChecked.class, LongColSubtractDoubleScalarChecked.class, DoubleColSubtractLongScalar.class, DoubleColSubtractDoubleScalar.class, + DoubleColSubtractLongScalarChecked.class, DoubleColSubtractDoubleScalarChecked.class, LongScalarSubtractLongColumn.class, LongScalarSubtractDoubleColumn.class, + LongScalarSubtractLongColumnChecked.class, LongScalarSubtractDoubleColumnChecked.class, DoubleScalarSubtractLongColumn.class, DoubleScalarSubtractDoubleColumn.class, + DoubleScalarSubtractLongColumnChecked.class, DoubleScalarSubtractDoubleColumnChecked.class, DecimalColSubtractDecimalColumn.class, DecimalColSubtractDecimalScalar.class, DecimalScalarSubtractDecimalColumn.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java index e2a638da518a2071ff15b8da6899646ec45c832a..044fb062752f77d3eea96d52d1bfb4bb86f1cd6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumnChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -35,11 +36,17 @@ @Description(name = "%", value = "a _FUNC_ b - Returns the remainder when dividing a by b") @VectorizedExpressions({LongColModuloLongColumn.class, LongColModuloDoubleColumn.class, + LongColModuloLongColumnChecked.class, LongColModuloDoubleColumnChecked.class, DoubleColModuloLongColumn.class, DoubleColModuloDoubleColumn.class, + DoubleColModuloLongColumnChecked.class, DoubleColModuloDoubleColumnChecked.class, LongColModuloLongScalar.class, LongColModuloDoubleScalar.class, + LongColModuloLongScalarChecked.class, LongColModuloDoubleScalarChecked.class, DoubleColModuloLongScalar.class, DoubleColModuloDoubleScalar.class, + DoubleColModuloLongScalarChecked.class, DoubleColModuloDoubleScalarChecked.class, LongScalarModuloLongColumn.class, LongScalarModuloDoubleColumn.class, + LongScalarModuloLongColumnChecked.class, LongScalarModuloDoubleColumnChecked.class, DoubleScalarModuloLongColumn.class, DoubleScalarModuloDoubleColumn.class, + DoubleScalarModuloLongColumnChecked.class, DoubleScalarModuloDoubleColumnChecked.class, DecimalColModuloDecimalColumn.class, DecimalColModuloDecimalScalar.class, DecimalScalarModuloDecimalColumn.class}) public class GenericUDFOPMod extends GenericUDFBaseNumeric { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java index 99d1ad7f203d946fd89d26074bd0e00dec8b3a1a..616641d2f092edaaf2105e80b4d801df065cfa2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java @@ -34,11 +34,17 @@ @Description(name = "*", value = "a _FUNC_ b - Multiplies a by b") @VectorizedExpressions({LongColMultiplyLongColumn.class, LongColMultiplyDoubleColumn.class, + LongColMultiplyLongColumnChecked.class, LongColMultiplyDoubleColumnChecked.class, DoubleColMultiplyLongColumn.class, DoubleColMultiplyDoubleColumn.class, + DoubleColMultiplyLongColumnChecked.class, DoubleColMultiplyDoubleColumnChecked.class, LongColMultiplyLongScalar.class, LongColMultiplyDoubleScalar.class, + LongColMultiplyLongScalarChecked.class, LongColMultiplyDoubleScalarChecked.class, DoubleColMultiplyLongScalar.class, DoubleColMultiplyDoubleScalar.class, + DoubleColMultiplyLongScalarChecked.class, DoubleColMultiplyDoubleScalarChecked.class, LongScalarMultiplyLongColumn.class, LongScalarMultiplyDoubleColumn.class, + LongScalarMultiplyLongColumnChecked.class, LongScalarMultiplyDoubleColumnChecked.class, DoubleScalarMultiplyLongColumn.class, DoubleScalarMultiplyDoubleColumn.class, + DoubleScalarMultiplyLongColumnChecked.class, DoubleScalarMultiplyDoubleColumnChecked.class, DecimalColMultiplyDecimalColumn.class, DecimalColMultiplyDecimalScalar.class, DecimalScalarMultiplyDecimalColumn.class}) public class GenericUDFOPMultiply extends GenericUDFBaseNumeric { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java index 4e45788936559bbb7cfe65e9ffd083747b37dcc2..3a88759818ad9fb0ed6cb05835fe0d6303b413c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java @@ -24,8 +24,10 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinusChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncNegateDecimalToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinusChecked; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -38,7 +40,8 @@ import org.apache.hadoop.io.LongWritable; @Description(name = "-", value = "_FUNC_ a - Returns -a") -@VectorizedExpressions({LongColUnaryMinus.class, DoubleColUnaryMinus.class, FuncNegateDecimalToDecimal.class}) +@VectorizedExpressions({LongColUnaryMinus.class, DoubleColUnaryMinus.class, FuncNegateDecimalToDecimal.class, + LongColUnaryMinusChecked.class, DoubleColUnaryMinusChecked.class}) public class GenericUDFOPNegative extends GenericUDFBaseUnary { public GenericUDFOPNegative() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java index b1200e673e6b470b5fd1cc856270a6da615f16cb..bc87a0d6e70e18e468fad2631779267c810644f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java @@ -35,10 +35,15 @@ */ @Description(name = "+", value = "a _FUNC_ b - Returns a+b") @VectorizedExpressions({LongColAddLongColumn.class, LongColAddDoubleColumn.class, + LongColAddLongColumnChecked.class, LongColAddDoubleColumnChecked.class, DoubleColAddLongColumn.class, DoubleColAddDoubleColumn.class, LongColAddLongScalar.class, + DoubleColAddLongColumnChecked.class, DoubleColAddDoubleColumnChecked.class, LongColAddLongScalarChecked.class, LongColAddDoubleScalar.class, DoubleColAddLongScalar.class, DoubleColAddDoubleScalar.class, + LongColAddDoubleScalarChecked.class, DoubleColAddLongScalarChecked.class, DoubleColAddDoubleScalarChecked.class, LongScalarAddLongColumn.class, LongScalarAddDoubleColumn.class, DoubleScalarAddLongColumn.class, + LongScalarAddLongColumnChecked.class, LongScalarAddDoubleColumnChecked.class, DoubleScalarAddLongColumnChecked.class, DoubleScalarAddDoubleColumn.class, + DoubleScalarAddDoubleColumnChecked.class, DecimalScalarAddDecimalColumn.class, DecimalColAddDecimalColumn.class, DecimalColAddDecimalScalar.class, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java index ab6f6b79316818cac458390dc2d087091057c63b..4b70aa4920b3acf48a75396eeeac938d770b80a2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java @@ -23,7 +23,10 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinusChecked; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; /** @@ -43,4 +46,35 @@ public void testUnaryMinus() { assertEquals(0, inVector[i]+outVector[i]); } } + + + @Test + public void testUnaryMinusCheckedOverflow() { + VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1, 2, 0); + //set value to MIN_VALUE so that -MIN_VALUE overflows + ((LongColumnVector)vrg.cols[0]).vector[0] = Integer.MIN_VALUE; + LongColUnaryMinusChecked expr = new LongColUnaryMinusChecked(0, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + expr.evaluate(vrg); + //verify + long[] inVector = ((LongColumnVector) vrg.cols[0]).vector; + long[] outVector = ((LongColumnVector) vrg.cols[1]).vector; + for (int i = 0; i < outVector.length; i++) { + assertEquals(0, inVector[i]+outVector[i]); + } + } + + @Test + public void testUnaryMinusChecked() { + VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1024, 2, 23); + LongColUnaryMinusChecked expr = new LongColUnaryMinusChecked(0, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + expr.evaluate(vrg); + //verify + long[] inVector = ((LongColumnVector) vrg.cols[0]).vector; + long[] outVector = ((LongColumnVector) vrg.cols[1]).vector; + for (int i = 0; i < outVector.length; i++) { + assertEquals(0, inVector[i]+outVector[i]); + } + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index 02dec659ce421eef06f924bb6973070878d57be3..acb319823485fabd5d0b275a0ff12037e16b9324 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColMultiplyDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumnChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColAddDecimalColumn; @@ -52,7 +53,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarAddDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarMultiplyDecimalColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalarChecked; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; /** @@ -62,8 +65,23 @@ @Test public void testLongColAddLongScalarNoNulls() { + longColAddLongScalarNoNulls(false); + } + + @Test + public void testLongColAddLongScalarCheckedNoNulls() { + longColAddLongScalarNoNulls(true); + } + + private void longColAddLongScalarNoNulls(boolean checked) { VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); - LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); + VectorExpression expr; + if (checked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } expr.evaluate(vrg); //verify for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { @@ -105,12 +123,27 @@ public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() { @Test public void testLongColAddLongScalarWithNulls() { + longColAddLongScalarCheckedWithNulls(false); + } + + @Test + public void testLongColAddLongScalarCheckedWithNulls() { + longColAddLongScalarCheckedWithNulls(true); + } + + private void longColAddLongScalarCheckedWithNulls(boolean isChecked) { VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector( VectorizedRowBatch.DEFAULT_SIZE); LongColumnVector lcv = (LongColumnVector) batch.cols[0]; LongColumnVector lcvOut = (LongColumnVector) batch.cols[1]; TestVectorizedRowBatch.addRandomNulls(lcv); - LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); + VectorExpression expr; + if (isChecked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } expr.evaluate(batch); // verify @@ -128,9 +161,18 @@ public void testLongColAddLongScalarWithNulls() { @Test public void testLongColAddLongScalarWithRepeating() { + longColAddLongScalarWithRepeatingUtil(false); + } + + @Test + public void testLongColAddLongScalarCheckedWithRepeating() { + longColAddLongScalarWithRepeatingUtil(true); + } + + private void longColAddLongScalarWithRepeatingUtil(boolean isChecked) { LongColumnVector in, out; VectorizedRowBatch batch; - LongColAddLongScalar expr; + VectorExpression expr; // Case 1: is repeating, no nulls batch = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); @@ -138,7 +180,13 @@ public void testLongColAddLongScalarWithRepeating() { in.isRepeating = true; out = (LongColumnVector) batch.cols[1]; out.isRepeating = false; - expr = new LongColAddLongScalar(0, 23, 1); + if(isChecked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } + expr.evaluate(batch); // verify Assert.assertTrue(out.isRepeating); @@ -156,7 +204,13 @@ public void testLongColAddLongScalarWithRepeating() { out.isRepeating = false; out.isNull[0] = false; out.noNulls = true; - expr = new LongColAddLongScalar(0, 23, 1); + if (isChecked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } + expr.evaluate(batch); // verify Assert.assertTrue(out.isRepeating); @@ -195,6 +249,15 @@ public static void verifyLongNullDataVectorEntries( @Test public void testLongColAddLongColumn() { + longColAddLongColumnUtil(false); + } + + @Test + public void testLongColAddLongColumnChecked() { + longColAddLongColumnUtil(true); + } + + private void longColAddLongColumnUtil(boolean isChecked) { int seed = 17; VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch( VectorizedRowBatch.DEFAULT_SIZE, @@ -205,7 +268,14 @@ public void testLongColAddLongColumn() { LongColumnVector lcv3 = (LongColumnVector) vrg.cols[3]; LongColumnVector lcv4 = (LongColumnVector) vrg.cols[4]; LongColumnVector lcv5 = (LongColumnVector) vrg.cols[5]; - LongColAddLongColumn expr = new LongColAddLongColumn(0, 1, 2); + VectorExpression expr; + if (isChecked) { + expr = new LongColAddLongColumnChecked(0, 1, 2); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongColumn(0, 1, 2); + } + expr.evaluate(vrg); for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { assertEquals((i+1) * seed * 3, lcv2.vector[i]); @@ -235,7 +305,13 @@ public void testLongColAddLongColumn() { // Now test with repeating flag lcv3.isRepeating = true; - LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5); + VectorExpression expr2; + if (isChecked) { + expr2 = new LongColAddLongColumnChecked(3, 4, 5); + expr2.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr2 = new LongColAddLongColumn(3, 4, 5); + } expr2.evaluate(vrg); for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { assertEquals(seed * (4 + 5*(i+1)), lcv5.vector[i]); diff --git a/ql/src/test/queries/clientpositive/vectorization_numeric_overflows.q b/ql/src/test/queries/clientpositive/vectorization_numeric_overflows.q new file mode 100644 index 0000000000000000000000000000000000000000..92d985e8c23d2af61497ecd34f9b547c3eefebb4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorization_numeric_overflows.q @@ -0,0 +1,157 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cbo.enable=false; +set hive.vectorized.use.checked.expressions=true; + + +CREATE TABLE test_overflow ( + ctinyint1 TINYINT, + ctinyint2 TINYINT, + csmallint1 SMALLINT, + csmallint2 SMALLINT, + cint1 INT, + cint2 INT, + cbigint1 BIGINT, + cbigint2 BIGINT, + cfloat1 FLOAT, + cfloat2 FLOAT, + cdouble1 DOUBLE, + cdouble2 DOUBLE) +STORED AS PARQUET; + +-- values stored in the columns are the min and max respectively for each column type +insert into test_overflow values (-128, 127, -32768, 32767, -2147483648, 2147483647, -9223372036854775808, 9223372036854775807, 1.401298464324817E-45, 3.4028234663852886E38, 4.9E-324, 1.7976931348623157E308); + +insert into test_overflow values (127, -128, 32767, -32768, 2147483647, -2147483648, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324); + +-- stored values represent the MAX_RANGE/2 and MAX_RANGE/2 + 1 for integer types. These are used to cause overflow in pmod UDF +insert into test_overflow values (64, 65, 32767, -32768, 1073741824, 1073741825, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324); + +select * from test_overflow; + +-- the substraction in the where clause tips integer column below the min value causing it to underflow +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0; +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0; + +-- the addition in the where clause tips integer column over the max value causing it to overflow +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0; +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0; + + +-- test overflow in multiply operator +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0; +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0; + + +-- underflow in tinyint case +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0; +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0; + +-- overflow in tinyint case +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0; +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0; + +-- overflow for short datatype in multiply operation +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0; +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0; +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0; + +create table parquettable (t1 tinyint, t2 tinyint, i1 int, i2 int) stored as parquet; +insert into parquettable values (-104, 25,2147483647, 10), (-112, 24, -2147483648, 10), (54, 9, 2147483647, -50); + + +-- test ColSubstractCol operation underflow +explain vectorization expression select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc; +select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc; + +-- the above query should return the same results in non-vectorized mode +set hive.vectorized.execution.enabled=false; +select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc; + +-- test integer ColSubstractCol overflow +set hive.vectorized.execution.enabled=true; +explain vectorization expression select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc; +select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc; + +-- the above query should return the same results in non-vectorized mode +set hive.vectorized.execution.enabled=false; +select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc; + +--Test ColumnUnaryMinus.txt +set hive.vectorized.execution.enabled=false; +select cint1 from test_overflow where -cint1 >= 0; +select cfloat1 from test_overflow where -cfloat1 >= 0; + +set hive.vectorized.execution.enabled=true; +select cint1 from test_overflow where -cint1 >= 0; +select cfloat1 from test_overflow where -cfloat1 >= 0; + + +-- test scalarMultiplyCol overflow +set hive.vectorized.execution.enabled=false; +select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0; + +set hive.vectorized.execution.enabled=true; +select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0; + +-- test ConstantVectorExpression overflow behavior +-- this works without checked expressions but good to have a test case exercising this +set hive.vectorized.execution.enabled=false; +select 2147483648 from test_overflow; + +set hive.vectorized.execution.enabled=false; +select 2147483648 from test_overflow; + +-- test PosMod vector expression, the third row will overflow the int range and cause the result to be negative +set hive.vectorized.execution.enabled=false; +select * from test_overflow where pmod(cint1, 1073741825) > 0; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=true; +select * from test_overflow where pmod(cint1, 1073741825) > 0; + +-- cause short range overflow in pmod implementation, this works without posmod range checks but still good to have +set hive.vectorized.execution.enabled=false; +select * from test_overflow where pmod(csmallint1, 16385S) > 0; + +set hive.vectorized.execution.enabled=true; +explain vectorization expression select * from test_overflow where pmod(csmallint1, 16385S) > 0; +select * from test_overflow where pmod(csmallint1, 16385S) > 0; diff --git a/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out b/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5cc79e1731f3d1ad1e27b517975643d7e5a52243 --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out @@ -0,0 +1,1028 @@ +PREHOOK: query: CREATE TABLE test_overflow ( + ctinyint1 TINYINT, + ctinyint2 TINYINT, + csmallint1 SMALLINT, + csmallint2 SMALLINT, + cint1 INT, + cint2 INT, + cbigint1 BIGINT, + cbigint2 BIGINT, + cfloat1 FLOAT, + cfloat2 FLOAT, + cdouble1 DOUBLE, + cdouble2 DOUBLE) +STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_overflow +POSTHOOK: query: CREATE TABLE test_overflow ( + ctinyint1 TINYINT, + ctinyint2 TINYINT, + csmallint1 SMALLINT, + csmallint2 SMALLINT, + cint1 INT, + cint2 INT, + cbigint1 BIGINT, + cbigint2 BIGINT, + cfloat1 FLOAT, + cfloat2 FLOAT, + cdouble1 DOUBLE, + cdouble2 DOUBLE) +STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_overflow +PREHOOK: query: insert into test_overflow values (-128, 127, -32768, 32767, -2147483648, 2147483647, -9223372036854775808, 9223372036854775807, 1.401298464324817E-45, 3.4028234663852886E38, 4.9E-324, 1.7976931348623157E308) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_overflow +POSTHOOK: query: insert into test_overflow values (-128, 127, -32768, 32767, -2147483648, 2147483647, -9223372036854775808, 9223372036854775807, 1.401298464324817E-45, 3.4028234663852886E38, 4.9E-324, 1.7976931348623157E308) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_overflow +POSTHOOK: Lineage: test_overflow.cbigint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cbigint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint2 SCRIPT [] +PREHOOK: query: insert into test_overflow values (127, -128, 32767, -32768, 2147483647, -2147483648, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_overflow +POSTHOOK: query: insert into test_overflow values (127, -128, 32767, -32768, 2147483647, -2147483648, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_overflow +POSTHOOK: Lineage: test_overflow.cbigint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cbigint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint2 SCRIPT [] +PREHOOK: query: insert into test_overflow values (64, 65, 32767, -32768, 1073741824, 1073741825, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_overflow +POSTHOOK: query: insert into test_overflow values (64, 65, 32767, -32768, 1073741824, 1073741825, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_overflow +POSTHOOK: Lineage: test_overflow.cbigint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cbigint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint2 SCRIPT [] +PREHOOK: query: select * from test_overflow +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +64 65 32767 -32768 1073741824 1073741825 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: explain vectorization expression +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:int, val 0)(children: LongColSubtractLongScalarChecked(col 4:int, val 2) -> 13:int) + predicate: ((cint1 - 2) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint1 (type: int), (cint1 - 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 13] + selectExpressions: LongColSubtractLongScalarChecked(col 4:int, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-2147483648 2147483646 +2147483647 2147483645 +1073741824 1073741822 +PREHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-2147483648 2147483646 +2147483647 2147483645 +1073741824 1073741822 +PREHOOK: query: explain vectorization expression +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:int, val 0)(children: LongColAddLongScalarChecked(col 5:int, val 2) -> 13:int) + predicate: ((cint2 + 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint2 (type: int), (cint2 + 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 13] + selectExpressions: LongColAddLongScalarChecked(col 5:int, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 -2147483647 +-2147483648 -2147483646 +PREHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 -2147483647 +-2147483648 -2147483646 +PREHOOK: query: explain vectorization expression +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:int, val 0)(children: LongColMultiplyLongScalarChecked(col 5:int, val 2) -> 13:int) + predicate: ((cint2 * 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint2 (type: int), (cint2 * 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 13] + selectExpressions: LongColMultiplyLongScalarChecked(col 5:int, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 -2 +1073741825 -2147483646 +PREHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 -2 +1073741825 -2147483646 +PREHOOK: query: explain vectorization expression +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:tinyint, val 0)(children: LongColSubtractLongScalarChecked(col 0:tinyint, val 2) -> 13:tinyint) + predicate: ((ctinyint1 - 2) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint1 (type: tinyint), (ctinyint1 - 2) (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 13] + selectExpressions: LongColSubtractLongScalarChecked(col 0:tinyint, val 2) -> 13:tinyint + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 126 +127 125 +64 62 +PREHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 126 +127 125 +64 62 +PREHOOK: query: explain vectorization expression +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:tinyint, val 0)(children: LongColAddLongScalarChecked(col 1:tinyint, val 2) -> 13:tinyint) + predicate: ((ctinyint2 + 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint2 (type: tinyint), (ctinyint2 + 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 13] + selectExpressions: LongColAddLongScalarChecked(col 1:tinyint, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +127 129 +-128 -126 +PREHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +127 129 +-128 -126 +PREHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:smallint, val 0)(children: LongColMultiplyLongScalarChecked(col 3:smallint, val 2) -> 13:smallint) + predicate: ((csmallint2 * 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint2 (type: smallint), (csmallint2 * 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 13] + selectExpressions: LongColMultiplyLongScalarChecked(col 3:smallint, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +32767 65534 +PREHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint2 * 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint2 (type: smallint), (csmallint2 * 2) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +32767 65534 +PREHOOK: query: create table parquettable (t1 tinyint, t2 tinyint, i1 int, i2 int) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquettable +POSTHOOK: query: create table parquettable (t1 tinyint, t2 tinyint, i1 int, i2 int) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquettable +PREHOOK: query: insert into parquettable values (-104, 25,2147483647, 10), (-112, 24, -2147483648, 10), (54, 9, 2147483647, -50) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquettable +POSTHOOK: query: insert into parquettable values (-104, 25,2147483647, 10), (-112, 24, -2147483648, 10), (54, 9, 2147483647, -50) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquettable +POSTHOOK: Lineage: parquettable.i1 SCRIPT [] +POSTHOOK: Lineage: parquettable.i2 SCRIPT [] +POSTHOOK: Lineage: parquettable.t1 SCRIPT [] +POSTHOOK: Lineage: parquettable.t2 SCRIPT [] +PREHOOK: query: explain vectorization expression select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquettable + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((t1 - t2) < 50) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 - t2) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint) + sort order: - + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: tinyint), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +54 9 45 +PREHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +54 9 45 +PREHOOK: query: explain vectorization expression select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquettable + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 5:int, val 50)(children: LongColSubtractLongColumnChecked(col 2:int, col 3:int) -> 5:int) + predicate: ((i1 - i2) < 50) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i1 (type: int), i2 (type: int), (i1 - i2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 5] + selectExpressions: LongColSubtractLongColumnChecked(col 2:int, col 3:int) -> 5:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +2147483647 -50 -2147483599 +PREHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +2147483647 -50 -2147483599 +PREHOOK: query: select cint1 from test_overflow where -cint1 >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1 from test_overflow where -cint1 >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cint1 from test_overflow where -cint1 >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1 from test_overflow where -cint1 >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 0 +PREHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 0 +PREHOOK: query: select 2147483648 from test_overflow +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select 2147483648 from test_overflow +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483648 +2147483648 +2147483648 +PREHOOK: query: select 2147483648 from test_overflow +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select 2147483648 from test_overflow +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483648 +2147483648 +2147483648 +PREHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +64 65 32767 -32768 1073741824 1073741825 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: explain vectorization expression select * from test_overflow where pmod(csmallint1, 16385S) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select * from test_overflow where pmod(csmallint1, 16385S) > 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:smallint, val 0)(children: PosModLongToLong(col 2, divisor 16385) -> 13:smallint) + predicate: ((csmallint1 pmod 16385) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint1 (type: tinyint), ctinyint2 (type: tinyint), csmallint1 (type: smallint), csmallint2 (type: smallint), cint1 (type: int), cint2 (type: int), cbigint1 (type: bigint), cbigint2 (type: bigint), cfloat1 (type: float), cfloat2 (type: float), cdouble1 (type: double), cdouble2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +64 65 32767 -32768 1073741824 1073741825 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 657ea34e11f7465e6c77d45128b298e7326a057b..021fe03e4d1ec4001c82088ed0bb711b1bb2fe9b 100644 --- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -165,55 +165,92 @@ {"IntervalYearMonthArithmeticTimestamp", "Add","+", "interval_year_month", "Col", "timestamp", "Scalar"}, // Long/double arithmetic - {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, - {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, + {"ColumnArithmeticScalar", "Add", "long", "long", "+", "CHECKED"}, + {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, + {"ColumnArithmeticScalar", "Subtract", "long", "long", "-", "CHECKED"}, + {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, {"ColumnArithmeticScalar", "Multiply", "long", "long", "*"}, + {"ColumnArithmeticScalar", "Multiply", "long", "long", "*", "CHECKED"}, {"ColumnArithmeticScalar", "Add", "long", "double", "+"}, + {"ColumnArithmeticScalar", "Add", "long", "double", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "long", "double", "-"}, + {"ColumnArithmeticScalar", "Subtract", "long", "double", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Multiply", "long", "double", "*"}, + {"ColumnArithmeticScalar", "Multiply", "long", "double", "*", "CHECKED"}, {"ColumnArithmeticScalar", "Add", "double", "long", "+"}, + {"ColumnArithmeticScalar", "Add", "double", "long", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "double", "long", "-"}, + {"ColumnArithmeticScalar", "Subtract", "double", "long", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Multiply", "double", "long", "*"}, + {"ColumnArithmeticScalar", "Multiply", "double", "long", "*", "CHECKED"}, - {"ColumnArithmeticScalar", "Add", "double", "double", "+"}, + {"ColumnArithmeticScalar", "Add", "double", "double", "+",}, + {"ColumnArithmeticScalar", "Add", "double", "double", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "double", "double", "-"}, + {"ColumnArithmeticScalar", "Subtract", "double", "double", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Multiply", "double", "double", "*"}, + {"ColumnArithmeticScalar", "Multiply", "double", "double", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "long", "long", "+"}, + {"ScalarArithmeticColumn", "Add", "long", "long", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "long", "long", "-"}, + {"ScalarArithmeticColumn", "Subtract", "long", "long", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "long", "long", "*"}, + {"ScalarArithmeticColumn", "Multiply", "long", "long", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "long", "double", "+"}, + {"ScalarArithmeticColumn", "Add", "long", "double", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "long", "double", "-"}, + {"ScalarArithmeticColumn", "Subtract", "long", "double", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "long", "double", "*"}, + {"ScalarArithmeticColumn", "Multiply", "long", "double", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "double", "long", "+"}, + {"ScalarArithmeticColumn", "Add", "double", "long", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "double", "long", "-"}, + {"ScalarArithmeticColumn", "Subtract", "double", "long", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "double", "long", "*"}, + {"ScalarArithmeticColumn", "Multiply", "double", "long", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "double", "double", "+"}, + {"ScalarArithmeticColumn", "Add", "double", "double", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "double", "double", "-"}, + {"ScalarArithmeticColumn", "Subtract", "double", "double", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "double", "double", "*"}, + {"ScalarArithmeticColumn", "Multiply", "double", "double", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "long", "long", "+"}, + {"ColumnArithmeticColumn", "Add", "long", "long", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "long", "long", "-"}, + {"ColumnArithmeticColumn", "Subtract", "long", "long", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "long", "long", "*"}, + {"ColumnArithmeticColumn", "Multiply", "long", "long", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "long", "double", "+"}, + {"ColumnArithmeticColumn", "Add", "long", "double", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "long", "double", "-"}, + {"ColumnArithmeticColumn", "Subtract", "long", "double", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "long", "double", "*"}, + {"ColumnArithmeticColumn", "Multiply", "long", "double", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "double", "long", "+"}, + {"ColumnArithmeticColumn", "Add", "double", "long", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "double", "long", "-"}, + {"ColumnArithmeticColumn", "Subtract", "double", "long", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "double", "long", "*"}, + {"ColumnArithmeticColumn", "Multiply", "double", "long", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "double", "double", "+"}, + {"ColumnArithmeticColumn", "Add", "double", "double", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "double", "double", "-"}, + {"ColumnArithmeticColumn", "Subtract", "double", "double", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "double", "double", "*"}, + {"ColumnArithmeticColumn", "Multiply", "double", "double", "*", "CHECKED"}, + //Divide operations are not CHECKED because the output is always of the type double {"ColumnDivideScalar", "Divide", "long", "double", "/"}, {"ColumnDivideScalar", "Divide", "double", "long", "/"}, {"ColumnDivideScalar", "Divide", "double", "double", "/"}, @@ -225,16 +262,27 @@ {"ColumnDivideColumn", "Divide", "double", "double", "/"}, {"ColumnDivideScalar", "Modulo", "long", "long", "%"}, + {"ColumnDivideScalar", "Modulo", "long", "long", "%", "CHECKED"}, {"ColumnDivideScalar", "Modulo", "long", "double", "%"}, + {"ColumnDivideScalar", "Modulo", "long", "double", "%", "CHECKED"}, {"ColumnDivideScalar", "Modulo", "double", "long", "%"}, + {"ColumnDivideScalar", "Modulo", "double", "long", "%", "CHECKED"}, {"ColumnDivideScalar", "Modulo", "double", "double", "%"}, + {"ColumnDivideScalar", "Modulo", "double", "double", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "long", "long", "%"}, + {"ScalarDivideColumn", "Modulo", "long", "long", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "long", "double", "%"}, + {"ScalarDivideColumn", "Modulo", "long", "double", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "double", "long", "%"}, + {"ScalarDivideColumn", "Modulo", "double", "long", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "double", "double", "%"}, + {"ScalarDivideColumn", "Modulo", "double", "double", "%", "CHECKED"}, {"ColumnDivideColumn", "Modulo", "long", "double", "%"}, + {"ColumnDivideColumn", "Modulo", "long", "double", "%", "CHECKED"}, {"ColumnDivideColumn", "Modulo", "double", "long", "%"}, + {"ColumnDivideColumn", "Modulo", "double", "long", "%", "CHECKED"}, {"ColumnDivideColumn", "Modulo", "double", "double", "%"}, + {"ColumnDivideColumn", "Modulo", "double", "double", "%", "CHECKED"}, {"ColumnArithmeticScalarDecimal", "Add"}, {"ColumnArithmeticScalarDecimal", "Subtract"}, @@ -996,7 +1044,9 @@ // classes {"ColumnUnaryMinus", "long"}, + {"ColumnUnaryMinus", "long", "CHECKED"}, {"ColumnUnaryMinus", "double"}, + {"ColumnUnaryMinus", "double", "CHECKED"}, // IF conditional expression // fileHeader, resultType, arg2Type, arg3Type @@ -2087,8 +2137,10 @@ private void generateColumnUnaryMinus(String[] tdesc) throws Exception { String inputColumnVectorType = this.getColumnVectorType(operandType); String outputColumnVectorType = inputColumnVectorType; String returnType = operandType; - String className = getCamelCaseType(operandType) + "ColUnaryMinus"; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + boolean checked = (tdesc.length == 3 && "CHECKED".equals(tdesc[2])); + String className = getCamelCaseType(operandType) + "ColUnaryMinus" + + (checked ? "Checked" : ""); + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); String vectorExprArgType = operandType; if (operandType.equals("long")) { @@ -2101,7 +2153,10 @@ private void generateColumnUnaryMinus(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", outputColumnVectorType); templateString = templateString.replaceAll("", operandType); templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", getCamelCaseType(returnType)); templateString = templateString.replaceAll("", vectorExprArgType); + String ifDefined = checked ? tdesc[2] : ""; + templateString = evaluateIfDefined(templateString, ifDefined); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } @@ -2260,8 +2315,10 @@ private void generateColumnArithmeticColumn(String [] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; String operandType2 = tdesc[3]; + boolean checked = tdesc.length == 6 && "CHECKED".equals(tdesc[5]); String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Column"; + + "Col" + operatorName + getCamelCaseType(operandType2) + "Column" + + (checked ? "Checked" : ""); String returnType = getArithmeticReturnType(operandType1, operandType2); generateColumnArithmeticOperatorColumn(tdesc, returnType, className); } @@ -2636,6 +2693,7 @@ private void generateColumnArithmeticOperatorColumn(String[] tdesc, String retur String inputColumnVectorType1 = this.getColumnVectorType(operandType1); String inputColumnVectorType2 = this.getColumnVectorType(operandType2); String operatorSymbol = tdesc[4]; + String ifDefined = tdesc.length == 6 ? tdesc[5] : ""; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); @@ -2650,6 +2708,8 @@ private void generateColumnArithmeticOperatorColumn(String[] tdesc, String retur templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); templateString = templateString.replaceAll("", getCamelCaseType(returnType)); + templateString = evaluateIfDefined(templateString, ifDefined); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); @@ -2713,6 +2773,8 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur String inputColumnVectorType = this.getColumnVectorType(operandType1); String operatorSymbol = tdesc[4]; + String ifDefined = (tdesc.length == 6 ? tdesc[5] : ""); + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); @@ -2724,6 +2786,9 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", getCamelCaseType(returnType)); + templateString = evaluateIfDefined(templateString, ifDefined); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); @@ -2793,6 +2858,7 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur returnType == null ? "long" : returnType); String inputColumnVectorType = this.getColumnVectorType(operandType2); String operatorSymbol = tdesc[4]; + String ifDefined = (tdesc.length == 6 ? tdesc[5] : ""); //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); @@ -2806,6 +2872,7 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); templateString = templateString.replaceAll("", getCamelCaseType(returnType)); + templateString = evaluateIfDefined(templateString, ifDefined); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); @@ -2827,8 +2894,10 @@ private void generateColumnArithmeticScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; String operandType2 = tdesc[3]; + boolean checked = tdesc.length == 6 && "CHECKED".equals(tdesc[5]); String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar"; + + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar" + + (checked ? "Checked" : ""); String returnType = getArithmeticReturnType(operandType1, operandType2); generateColumnArithmeticOperatorScalar(tdesc, returnType, className); } @@ -2921,8 +2990,10 @@ private void generateScalarArithmeticColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; String operandType2 = tdesc[3]; + boolean checked = (tdesc.length == 6 && "CHECKED".equals(tdesc[5])); String className = getCamelCaseType(operandType1) - + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column"; + + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column" + + (checked ? "Checked" : ""); String returnType = getArithmeticReturnType(operandType1, operandType2); generateScalarArithmeticOperatorColumn(tdesc, returnType, className); }