commit 39879d6833cc078178966c7e9837cbfb829367ca Author: Vihang Karajgaonkar Date: Sun Jan 14 09:47:26 2018 -0800 HIVE-18421 : Vectorized execution handles overflows in a different manner than non-vectorized execution diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 26e08e41f3660f77ab108a8576f87ce64b9deb61..3eac64a5ec9649bb353684c27364ed3fb7636fb8 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2979,7 +2979,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "The default value is true."), HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", true, "This flag should be set to true to enable vectorization of ROW__ID."), - + HIVE_VECTORIZATION_USE_CHECKED_EXPRESSIONS("hive.vectorized.use.checked.expressions", false, + "This flag should be set to true to use overflow checked vector expressions when available.\n" + + "For example, arithmetic expressions which can overflow the output data type can be evaluated using\n" + + " checked vector expressions so that they produce same result as non-vectorized evaluation."), HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED( "hive.vectorized.input.format.supports.enabled", "decimal_64", @@ -2990,7 +2993,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE("hive.test.vectorized.execution.enabled.override", "none", new StringSet("none", "enable", "disable"), "internal use only, used to override the hive.vectorized.execution.enabled setting and\n" + - "turn off vectorization. The default is false, or course", + "turn off vectorization. The default is false, of course", true), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedArithmeticBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedArithmeticBench.java index 8016630b4ab85e90667c8a09885721dd82548d40..70ee9b7ddbd34f4c21344c2d12ac8197f29b2f23 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedArithmeticBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizedArithmeticBench.java @@ -19,6 +19,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumnChecked; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.runner.Runner; @@ -104,6 +106,26 @@ public void setup() { } } + public static class LongColAddLongColumnCheckedBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getLongColumnVector(), + getLongColumnVector()); + expression = new LongColAddLongColumnChecked(0, 1, 2); + expression.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("int")); + } + } + + public static class LongColAddLongColumnBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getLongColumnVector(), + getLongColumnVector()); + expression = new LongColAddLongColumn(0, 1, 2); + expression.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("int")); + } + } + public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include(".*" + VectorizedArithmeticBench.class.getSimpleName() + ".*").build(); diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index b5011c3adcedf8974d3241994733e0021a851cbd..64c4e01e27ed19b3d88373f6bcf2a9ae0b5054bd 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -122,7 +123,13 @@ public class extends VectorExpression { } } } - + +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED /* For the case when the output can have null values, follow * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors @@ -132,6 +139,13 @@ public class extends VectorExpression { NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum1) + ", " + getColumnParamString(1, colNum2); diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index cbec1abcc2b66f3ffc91b4778daf5017eff4379d..e7c23855196aae0fd287d81bd65d2e23240f6bd0 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -103,10 +104,22 @@ public class extends VectorExpression { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } - +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum) + ", val " + value; diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index 3e955578933dd7990939865527c3bd11023b3a90..dd5330d829a7f311d5f3aacd524de4218a8dc166 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -143,6 +144,12 @@ public class extends VectorExpression { } } +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED /* For the case when the output can have null values, follow * the convention that the data values must be 1 for long and * NaN for double. This is to prevent possible later zero-divide errors @@ -157,6 +164,13 @@ public class extends VectorExpression { } } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return getColumnParamString(0, colNum1) + ", " + getColumnParamString(1, colNum2); diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index f0ab4711e79c8a1bfceebcde9a3dda2b4e15a38a..b200ef97710584f93c4d2269f8d3694cf0e1c08f 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -100,7 +101,21 @@ public class extends VectorExpression { } outputColVector.isRepeating = false; } + +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED + } + +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; } +#ENDIF CHECKED @Override public String vectorExpressionParameters() { diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index e95baa6199e138a4e0c009e62ce495b626e5909c..67106c296677f6bd93584eca2b5d8834cedb8732 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; * of these ColumnVector imports may be needed. Listing both of them * rather than using ....vectorization.*; */ +import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -115,10 +116,23 @@ public class extends VectorExpression { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } +#IF CHECKED + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); +#ELSE +#ENDIF CHECKED NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } +#IF CHECKED + @Override + public boolean supportsCheckedExecution() { + return true; + } +#ENDIF CHECKED + @Override public String vectorExpressionParameters() { return "val " + value + ", " + getColumnParamString(1, colNum); diff --git a/ql/src/gen/vectorization/TestTemplates/TestClass.txt b/ql/src/gen/vectorization/TestTemplates/TestClass.txt index 62c58fb293fbe2d4d948c6a3409ee31466424a02..f15695a3580acba44fc64b88ef5707a7851305c9 100644 --- a/ql/src/gen/vectorization/TestTemplates/TestClass.txt +++ b/ql/src/gen/vectorization/TestTemplates/TestClass.txt @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; diff --git a/ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionCheckedEvaluation.txt b/ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionCheckedEvaluation.txt new file mode 100644 index 0000000000000000000000000000000000000000..069d9abea76881d1df113948988cfe32e3f5b856 --- /dev/null +++ b/ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionCheckedEvaluation.txt @@ -0,0 +1,65 @@ + + @Test + public void () { + + Random rand = new Random(SEED); + + outputColumnVector = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + inputColumnVector1 = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + inputColumnVector2 = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + VectorizedRowBatch rowBatch = new VectorizedRowBatch(3, BATCH_SIZE); + rowBatch.cols[0] = inputColumnVector1; + rowBatch.cols[1] = inputColumnVector2; + rowBatch.cols[2] = outputColumnVector; + + vectorExpression = + new (0, 1, 2); + vectorExpression.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("")); + + vectorExpression.evaluate(rowBatch); + + assertEquals( + "Output column vector repeating state does not match operand columns", + (!inputColumnVector1.noNulls && inputColumnVector1.isRepeating) + || (!inputColumnVector2.noNulls && inputColumnVector2.isRepeating) + || inputColumnVector1.isRepeating && inputColumnVector2.isRepeating, + outputColumnVector.isRepeating); + + assertEquals( + "Output column vector no nulls state does not match operand columns", + inputColumnVector1.noNulls && inputColumnVector2.noNulls, outputColumnVector.noNulls); + + //if repeating, only the first value matters + if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { + for(int i = 0; i < BATCH_SIZE; i++) { + //null vectors are safe to check, as they are always initialized to match the data vector + assertEquals("Output vector doesn't match input vectors' is null state for index", + inputColumnVector1.isNull[i] || inputColumnVector2.isNull[i], + outputColumnVector.isNull[i]); + } + } + } diff --git a/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt b/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt new file mode 100644 index 0000000000000000000000000000000000000000..df4f89d55c6ce41e6b0ef419ecc638944283981b --- /dev/null +++ b/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionCheckedEvaluation.txt @@ -0,0 +1,60 @@ + + @Test + public void () { + + Random rand = new Random(SEED); + + outputColumnVector = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + inputColumnVector = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + VectorizedRowBatch rowBatch = new VectorizedRowBatch(2, BATCH_SIZE); + rowBatch.cols[0] = inputColumnVector; + rowBatch.cols[1] = outputColumnVector; + + scalarValue = 0; + do { + scalarValue = rand.next(); + } while(scalarValue == 0); + + vectorExpression = + new (, 1); + vectorExpression.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("")); + + vectorExpression.evaluate(rowBatch); + + assertEquals( + "Output column vector is repeating state does not match operand column", + inputColumnVector.isRepeating, outputColumnVector.isRepeating); + + assertEquals( + "Output column vector no nulls state does not match operand column", + inputColumnVector.noNulls, outputColumnVector.noNulls); + + if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { + for(int i = 0; i < BATCH_SIZE; i++) { + //null vectors are safe to check, as they are always initialized to match the data vector + assertEquals("Output vector doesn't match input vector's is null state for index", + inputColumnVector.isNull[i], outputColumnVector.isNull[i]); + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index bbe78c8720e16163b642f54d27fdf6b65ba9850b..3167e9e9eb34afe2933bb475e595dcb080597f7f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -351,22 +351,37 @@ public String toString() { } } - public Class getVectorExpressionClass(Class udf, Descriptor descriptor) throws HiveException { + public Class getVectorExpressionClass(Class udf, Descriptor descriptor, + boolean useCheckedExpressionIfAvailable) throws HiveException { VectorizedExpressions annotation = AnnotationUtils.getAnnotation(udf, VectorizedExpressions.class); if (annotation == null || annotation.value() == null) { return null; } Class[] list = annotation.value(); + Class matchedVe = null; for (Class ve : list) { try { - if (ve.newInstance().getDescriptor().matches(descriptor)) { - return ve; + VectorExpression candidateVe = ve.newInstance(); + if (candidateVe.getDescriptor().matches(descriptor)) { + if (!useCheckedExpressionIfAvailable) { + // no need to look further for a checked variant of this expression + return ve; + } else if (candidateVe.supportsCheckedExecution()) { + return ve; + } else { + // vector expression doesn't support checked execution + // hold on to it in case there is no available checked variant + matchedVe = ve; + } } } catch (Exception ex) { throw new HiveException("Could not instantiate VectorExpression class " + ve.getSimpleName(), ex); } } + if (matchedVe != null) { + return matchedVe; + } if (LOG.isDebugEnabled()) { LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString()); for (Class ve : list) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 8264e8ad285deac29424bd1cb0bf626436d47c75..d1b52c67e99fd122a5e104f42b2e0c75aaff5147 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -133,6 +133,8 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { } private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + //when set to true use the overflow checked vector expressions + private boolean useCheckedVectorExpressions; private boolean reuseScratchColumns = HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS.defaultBoolVal; @@ -142,6 +144,8 @@ private void setHiveConfVars(HiveConf hiveConf) { this.reuseScratchColumns = HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); this.ocm.setReuseColumns(reuseScratchColumns); + useCheckedVectorExpressions = + HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_CHECKED_EXPRESSIONS); } private void copyHiveConfVars(VectorizationContext vContextEnvironment) { @@ -1563,7 +1567,8 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd } VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class vectorClass = this.vMap.getVectorExpressionClass(udfClass, descriptor); + Class vectorClass = + this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); if (vectorClass == null) { return null; } @@ -1706,7 +1711,8 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, } } VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor); + Class vclass = + this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); if (vclass == null) { if (LOG.isDebugEnabled()) { LOG.debug("No vector udf found for "+udfClass.getSimpleName() + ", descriptor: "+descriptor); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java new file mode 100644 index 0000000000000000000000000000000000000000..f36713928a8acf1ae585c3545144ee8b973fa6ee --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This vector expression implements a Checked variant of LongColModuloLongColumn + * If the outputTypeInfo is not long it casts the result column vector values to + * the set outputType so as to have similar result when compared to non-vectorized UDF + * execution. + */ +public class LongColModuloLongColumnChecked extends LongColModuloLongColumn { + public LongColModuloLongColumnChecked(int colNum1, int colNum2, int outputColumnNum) { + super(colNum1, colNum2, outputColumnNum); + } + + public LongColModuloLongColumnChecked() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + super.evaluate(batch); + //checked for overflow based on the outputTypeInfo + OverflowUtils + .accountForOverflowLong(outputTypeInfo, (LongColumnVector) batch.cols[outputColumnNum], batch.selectedInUse, + batch.selected, batch.size); + } + + @Override + public boolean supportsCheckedExecution() { + return true; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OverflowUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OverflowUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..e3f5398486358ff5c37d9e4c880d2b7d70697f95 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OverflowUtils.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility methods to handle integer overflow/underflows in a ColumnVector. + */ +public class OverflowUtils { + + private OverflowUtils() { + //prevent instantiation + } + + public static void accountForOverflowLong(TypeInfo outputTypeInfo, LongColumnVector v, + boolean selectedInUse, int[] sel, int n) { + if (outputTypeInfo == null) { + //can't do much if outputTypeInfo is not set + return; + } + switch (outputTypeInfo.getTypeName()) { + case serdeConstants.TINYINT_TYPE_NAME: + //byte + if (v.isRepeating) { + v.vector[0] = (byte) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (byte) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (byte) v.vector[i]; + } + } + break; + case serdeConstants.SMALLINT_TYPE_NAME: + //short + if (v.isRepeating) { + v.vector[0] = (short) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (short) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (short) v.vector[i]; + } + } + break; + case serdeConstants.INT_TYPE_NAME: + //int + if (v.isRepeating) { + v.vector[0] = (int) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (int) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (int) v.vector[i]; + } + } + break; + default: + //nothing to be done + } + } + + public static void accountForOverflowDouble(TypeInfo outputTypeInfo, DoubleColumnVector v, + boolean selectedInUse, int[] sel, int n) { + if (outputTypeInfo == null) { + //can't do much if outputTypeInfo is not set + return; + } + switch (outputTypeInfo.getTypeName()) { + case serdeConstants.FLOAT_TYPE_NAME: + //float + if (v.isRepeating) { + v.vector[0] = (float) v.vector[0]; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + v.vector[i] = (float) v.vector[i]; + } + } else { + for (int i = 0; i != n; i++) { + v.vector[i] = (float) v.vector[i]; + } + } + break; + default: + //nothing to be done + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java index 75ec419aa9ea5c3fcc5e7314fbac756d6a5d36d5..99008b8d6b613e3a6dd615ad33536ed628e67d5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModDoubleToDouble.java @@ -19,11 +19,14 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class PosModDoubleToDouble extends MathFuncDoubleToDouble { private static final long serialVersionUID = 1L; private final double divisor; + private boolean isOutputTypeFloat; public PosModDoubleToDouble(int inputCol, double scalarVal, int outputColumnNum) { super(inputCol, outputColumnNum); @@ -37,9 +40,26 @@ public PosModDoubleToDouble() { divisor = 0; } + /** + * Set type of the output column and also set the flag which determines if cast to float + * is needed while calculating PosMod expression + */ @Override - protected double func(double v) { + public void setOutputTypeInfo(TypeInfo outputTypeInfo) { + this.outputTypeInfo = outputTypeInfo; + isOutputTypeFloat = outputTypeInfo != null && serdeConstants.FLOAT_TYPE_NAME + .equals(outputTypeInfo.getTypeName()); + } + @Override + protected double func(double v) { + // if the outputType is a float cast the arguments to float to replicate the overflow behavior + // in non-vectorized UDF GenericUDFPosMod + if (isOutputTypeFloat) { + float castedV = (float) v; + float castedDivisor = (float) divisor; + return ((castedV % castedDivisor) + castedDivisor) % castedDivisor; + } // return positive modulo return ((v % divisor) + divisor) % divisor; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java index 6b4d714c9a79a55593c4a4d254267a3035abb10f..07dbfe3155ac861512d68e1d26b9eaa021fc7e5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/PosModLongToLong.java @@ -19,11 +19,14 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class PosModLongToLong extends MathFuncLongToLong { private static final long serialVersionUID = 1L; private final long divisor; + private String outputCastType = serdeConstants.BIGINT_TYPE_NAME; public PosModLongToLong(int inputCol, long scalarVal, int outputColumnNum) { super(inputCol, outputColumnNum); @@ -39,9 +42,37 @@ public PosModLongToLong() { @Override protected long func(long v) { + // pmod calculation can overflow based on the type of arguments + // casting the arguments according to outputTypeInfo so that the + // results match with GenericUDFPosMod implementation + switch (outputCastType) { + case serdeConstants.TINYINT_TYPE_NAME: + byte castedByte = (byte) v; + byte castedDivisorByte = (byte) divisor; + return ((castedByte % castedDivisorByte) + castedDivisorByte) % castedDivisorByte; - // return positive modulo - return ((v % divisor) + divisor) % divisor; + case serdeConstants.SMALLINT_TYPE_NAME: + short castedShort = (short) v; + short castedDivisorShort = (short) divisor; + return ((castedShort % castedDivisorShort) + castedDivisorShort) % castedDivisorShort; + + case serdeConstants.INT_TYPE_NAME: + int castedInt = (int) v; + int castedDivisorInt = (int) divisor; + return ((castedInt % castedDivisorInt) + castedDivisorInt) % castedDivisorInt; + default: + // default is using long types + return ((v % divisor) + divisor) % divisor; + } + } + + @Override + public void setOutputTypeInfo(TypeInfo outputTypeInfo) { + this.outputTypeInfo = outputTypeInfo; + //default outputTypeInfo is long + if (outputTypeInfo != null) { + outputCastType = outputTypeInfo.getTypeName(); + } } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 710165033627b33d9b238cc847dbac36c07ee5f6..4407961ab4566e4e1160c0ffa464e5ac314b5809 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -307,6 +307,17 @@ public static String getTypeName(TypeInfo typeInfo, DataTypePhysicalVariation da } } + /** + * A vector expression which implements a checked execution to account for overflow handling + * should override this method and return true. In such a case Vectorizer will use Checked + * variation of the vector expression to process data + * @return true if vector expression implements a Checked variation of vector expression + */ + public boolean supportsCheckedExecution() { + // default is false + return false; + } + @Override public String toString() { StringBuilder b = new StringBuilder(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java index af8552caa02f2896f393a5099abdb1ae5abd4c16..3c1a0795d906d0f7093705b024fa009aae7b84e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java @@ -27,11 +27,17 @@ @Description(name = "-", value = "a _FUNC_ b - Returns the difference a-b") @VectorizedExpressions({LongColSubtractLongColumn.class, LongColSubtractDoubleColumn.class, + LongColSubtractLongColumnChecked.class, LongColSubtractDoubleColumnChecked.class, DoubleColSubtractLongColumn.class, DoubleColSubtractDoubleColumn.class, + DoubleColSubtractLongColumnChecked.class, DoubleColSubtractDoubleColumnChecked.class, LongColSubtractLongScalar.class, LongColSubtractDoubleScalar.class, + LongColSubtractLongScalarChecked.class, LongColSubtractDoubleScalarChecked.class, DoubleColSubtractLongScalar.class, DoubleColSubtractDoubleScalar.class, + DoubleColSubtractLongScalarChecked.class, DoubleColSubtractDoubleScalarChecked.class, LongScalarSubtractLongColumn.class, LongScalarSubtractDoubleColumn.class, + LongScalarSubtractLongColumnChecked.class, LongScalarSubtractDoubleColumnChecked.class, DoubleScalarSubtractLongColumn.class, DoubleScalarSubtractDoubleColumn.class, + DoubleScalarSubtractLongColumnChecked.class, DoubleScalarSubtractDoubleColumnChecked.class, DecimalColSubtractDecimalColumn.class, DecimalColSubtractDecimalScalar.class, DecimalScalarSubtractDecimalColumn.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java index e2a638da518a2071ff15b8da6899646ec45c832a..044fb062752f77d3eea96d52d1bfb4bb86f1cd6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumnChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -35,11 +36,17 @@ @Description(name = "%", value = "a _FUNC_ b - Returns the remainder when dividing a by b") @VectorizedExpressions({LongColModuloLongColumn.class, LongColModuloDoubleColumn.class, + LongColModuloLongColumnChecked.class, LongColModuloDoubleColumnChecked.class, DoubleColModuloLongColumn.class, DoubleColModuloDoubleColumn.class, + DoubleColModuloLongColumnChecked.class, DoubleColModuloDoubleColumnChecked.class, LongColModuloLongScalar.class, LongColModuloDoubleScalar.class, + LongColModuloLongScalarChecked.class, LongColModuloDoubleScalarChecked.class, DoubleColModuloLongScalar.class, DoubleColModuloDoubleScalar.class, + DoubleColModuloLongScalarChecked.class, DoubleColModuloDoubleScalarChecked.class, LongScalarModuloLongColumn.class, LongScalarModuloDoubleColumn.class, + LongScalarModuloLongColumnChecked.class, LongScalarModuloDoubleColumnChecked.class, DoubleScalarModuloLongColumn.class, DoubleScalarModuloDoubleColumn.class, + DoubleScalarModuloLongColumnChecked.class, DoubleScalarModuloDoubleColumnChecked.class, DecimalColModuloDecimalColumn.class, DecimalColModuloDecimalScalar.class, DecimalScalarModuloDecimalColumn.class}) public class GenericUDFOPMod extends GenericUDFBaseNumeric { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java index 99d1ad7f203d946fd89d26074bd0e00dec8b3a1a..616641d2f092edaaf2105e80b4d801df065cfa2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMultiply.java @@ -34,11 +34,17 @@ @Description(name = "*", value = "a _FUNC_ b - Multiplies a by b") @VectorizedExpressions({LongColMultiplyLongColumn.class, LongColMultiplyDoubleColumn.class, + LongColMultiplyLongColumnChecked.class, LongColMultiplyDoubleColumnChecked.class, DoubleColMultiplyLongColumn.class, DoubleColMultiplyDoubleColumn.class, + DoubleColMultiplyLongColumnChecked.class, DoubleColMultiplyDoubleColumnChecked.class, LongColMultiplyLongScalar.class, LongColMultiplyDoubleScalar.class, + LongColMultiplyLongScalarChecked.class, LongColMultiplyDoubleScalarChecked.class, DoubleColMultiplyLongScalar.class, DoubleColMultiplyDoubleScalar.class, + DoubleColMultiplyLongScalarChecked.class, DoubleColMultiplyDoubleScalarChecked.class, LongScalarMultiplyLongColumn.class, LongScalarMultiplyDoubleColumn.class, + LongScalarMultiplyLongColumnChecked.class, LongScalarMultiplyDoubleColumnChecked.class, DoubleScalarMultiplyLongColumn.class, DoubleScalarMultiplyDoubleColumn.class, + DoubleScalarMultiplyLongColumnChecked.class, DoubleScalarMultiplyDoubleColumnChecked.class, DecimalColMultiplyDecimalColumn.class, DecimalColMultiplyDecimalScalar.class, DecimalScalarMultiplyDecimalColumn.class}) public class GenericUDFOPMultiply extends GenericUDFBaseNumeric { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java index 4e45788936559bbb7cfe65e9ffd083747b37dcc2..3a88759818ad9fb0ed6cb05835fe0d6303b413c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNegative.java @@ -24,8 +24,10 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinusChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncNegateDecimalToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinusChecked; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -38,7 +40,8 @@ import org.apache.hadoop.io.LongWritable; @Description(name = "-", value = "_FUNC_ a - Returns -a") -@VectorizedExpressions({LongColUnaryMinus.class, DoubleColUnaryMinus.class, FuncNegateDecimalToDecimal.class}) +@VectorizedExpressions({LongColUnaryMinus.class, DoubleColUnaryMinus.class, FuncNegateDecimalToDecimal.class, + LongColUnaryMinusChecked.class, DoubleColUnaryMinusChecked.class}) public class GenericUDFOPNegative extends GenericUDFBaseUnary { public GenericUDFOPNegative() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java index b1200e673e6b470b5fd1cc856270a6da615f16cb..5eb232cb297f275c5deaefa087e1a0f1b813b9ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java @@ -35,10 +35,15 @@ */ @Description(name = "+", value = "a _FUNC_ b - Returns a+b") @VectorizedExpressions({LongColAddLongColumn.class, LongColAddDoubleColumn.class, + LongColAddLongColumnChecked.class, LongColAddDoubleColumnChecked.class, DoubleColAddLongColumn.class, DoubleColAddDoubleColumn.class, LongColAddLongScalar.class, + DoubleColAddLongColumnChecked.class, DoubleColAddDoubleColumnChecked.class, LongColAddLongScalarChecked.class, LongColAddDoubleScalar.class, DoubleColAddLongScalar.class, DoubleColAddDoubleScalar.class, + LongColAddDoubleScalarChecked.class, DoubleColAddLongScalarChecked.class, DoubleColAddDoubleScalarChecked.class, LongScalarAddLongColumn.class, LongScalarAddDoubleColumn.class, DoubleScalarAddLongColumn.class, + LongScalarAddLongColumnChecked.class, LongScalarAddDoubleColumnChecked.class, DoubleScalarAddLongColumnChecked.class, DoubleScalarAddDoubleColumn.class, + DoubleScalarAddDoubleColumnChecked.class, DecimalScalarAddDecimalColumn.class, DecimalColAddDecimalColumn.class, DecimalColAddDecimalScalar.class, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java index ab6f6b79316818cac458390dc2d087091057c63b..e227f4411cb6374749d2ac9a13096aaf401fb9ab 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestUnaryMinus.java @@ -23,7 +23,9 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinusChecked; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; /** @@ -43,4 +45,35 @@ public void testUnaryMinus() { assertEquals(0, inVector[i]+outVector[i]); } } + + + @Test + public void testUnaryMinusCheckedOverflow() { + VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1, 2, 0); + //set value to MIN_VALUE so that -MIN_VALUE overflows and gets set to MIN_VALUE again + ((LongColumnVector)vrg.cols[0]).vector[0] = Integer.MIN_VALUE; + LongColUnaryMinusChecked expr = new LongColUnaryMinusChecked(0, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("int")); + expr.evaluate(vrg); + //verify + long[] inVector = ((LongColumnVector) vrg.cols[0]).vector; + long[] outVector = ((LongColumnVector) vrg.cols[1]).vector; + for (int i = 0; i < outVector.length; i++) { + assertEquals(Integer.MIN_VALUE, outVector[i]); + } + } + + @Test + public void testUnaryMinusChecked() { + VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1024, 2, 23); + LongColUnaryMinusChecked expr = new LongColUnaryMinusChecked(0, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + expr.evaluate(vrg); + //verify + long[] inVector = ((LongColumnVector) vrg.cols[0]).vector; + long[] outVector = ((LongColumnVector) vrg.cols[1]).vector; + for (int i = 0; i < outVector.length; i++) { + assertEquals(0, inVector[i]+outVector[i]); + } + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index 02dec659ce421eef06f924bb6973070878d57be3..acb319823485fabd5d0b275a0ff12037e16b9324 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColMultiplyDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumnChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColAddDecimalColumn; @@ -52,7 +53,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarAddDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarMultiplyDecimalColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalarChecked; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; /** @@ -62,8 +65,23 @@ @Test public void testLongColAddLongScalarNoNulls() { + longColAddLongScalarNoNulls(false); + } + + @Test + public void testLongColAddLongScalarCheckedNoNulls() { + longColAddLongScalarNoNulls(true); + } + + private void longColAddLongScalarNoNulls(boolean checked) { VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); - LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); + VectorExpression expr; + if (checked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } expr.evaluate(vrg); //verify for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { @@ -105,12 +123,27 @@ public static VectorizedRowBatch getVectorizedRowBatch2LongInDoubleOut() { @Test public void testLongColAddLongScalarWithNulls() { + longColAddLongScalarCheckedWithNulls(false); + } + + @Test + public void testLongColAddLongScalarCheckedWithNulls() { + longColAddLongScalarCheckedWithNulls(true); + } + + private void longColAddLongScalarCheckedWithNulls(boolean isChecked) { VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector( VectorizedRowBatch.DEFAULT_SIZE); LongColumnVector lcv = (LongColumnVector) batch.cols[0]; LongColumnVector lcvOut = (LongColumnVector) batch.cols[1]; TestVectorizedRowBatch.addRandomNulls(lcv); - LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); + VectorExpression expr; + if (isChecked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } expr.evaluate(batch); // verify @@ -128,9 +161,18 @@ public void testLongColAddLongScalarWithNulls() { @Test public void testLongColAddLongScalarWithRepeating() { + longColAddLongScalarWithRepeatingUtil(false); + } + + @Test + public void testLongColAddLongScalarCheckedWithRepeating() { + longColAddLongScalarWithRepeatingUtil(true); + } + + private void longColAddLongScalarWithRepeatingUtil(boolean isChecked) { LongColumnVector in, out; VectorizedRowBatch batch; - LongColAddLongScalar expr; + VectorExpression expr; // Case 1: is repeating, no nulls batch = getVectorizedRowBatchSingleLongVector(VectorizedRowBatch.DEFAULT_SIZE); @@ -138,7 +180,13 @@ public void testLongColAddLongScalarWithRepeating() { in.isRepeating = true; out = (LongColumnVector) batch.cols[1]; out.isRepeating = false; - expr = new LongColAddLongScalar(0, 23, 1); + if(isChecked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } + expr.evaluate(batch); // verify Assert.assertTrue(out.isRepeating); @@ -156,7 +204,13 @@ public void testLongColAddLongScalarWithRepeating() { out.isRepeating = false; out.isNull[0] = false; out.noNulls = true; - expr = new LongColAddLongScalar(0, 23, 1); + if (isChecked) { + expr = new LongColAddLongScalarChecked(0, 23, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongScalar(0, 23, 1); + } + expr.evaluate(batch); // verify Assert.assertTrue(out.isRepeating); @@ -195,6 +249,15 @@ public static void verifyLongNullDataVectorEntries( @Test public void testLongColAddLongColumn() { + longColAddLongColumnUtil(false); + } + + @Test + public void testLongColAddLongColumnChecked() { + longColAddLongColumnUtil(true); + } + + private void longColAddLongColumnUtil(boolean isChecked) { int seed = 17; VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch( VectorizedRowBatch.DEFAULT_SIZE, @@ -205,7 +268,14 @@ public void testLongColAddLongColumn() { LongColumnVector lcv3 = (LongColumnVector) vrg.cols[3]; LongColumnVector lcv4 = (LongColumnVector) vrg.cols[4]; LongColumnVector lcv5 = (LongColumnVector) vrg.cols[5]; - LongColAddLongColumn expr = new LongColAddLongColumn(0, 1, 2); + VectorExpression expr; + if (isChecked) { + expr = new LongColAddLongColumnChecked(0, 1, 2); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr = new LongColAddLongColumn(0, 1, 2); + } + expr.evaluate(vrg); for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { assertEquals((i+1) * seed * 3, lcv2.vector[i]); @@ -235,7 +305,13 @@ public void testLongColAddLongColumn() { // Now test with repeating flag lcv3.isRepeating = true; - LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5); + VectorExpression expr2; + if (isChecked) { + expr2 = new LongColAddLongColumnChecked(3, 4, 5); + expr2.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + } else { + expr2 = new LongColAddLongColumn(3, 4, 5); + } expr2.evaluate(vrg); for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { assertEquals(seed * (4 + 5*(i+1)), lcv5.vector[i]); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index 1950e92bd6d5e4f818588e691db30cd28193c716..a8f94e5002efd39e5e183fa473336372943a9fad 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncTanDoubleToDouble; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; @@ -585,6 +586,7 @@ public void testVectorPosMod() { b.cols[0].noNulls = true; inV.vector[4] = -4.0; VectorExpression expr = new PosModDoubleToDouble(0, 0.3d, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("double")); expr.evaluate(b); Assert.assertTrue(equalsWithinTolerance(((-4.0d % 0.3d) + 0.3d) % 0.3d, resultV.vector[4])); @@ -593,6 +595,49 @@ public void testVectorPosMod() { LongColumnVector resV2 = (LongColumnVector) b.cols[1]; b.cols[0].noNulls = true; expr = new PosModLongToLong(0, 3, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("tinyint")); + //((ISetLongArg) expr).setArg(3); + expr.evaluate(b); + Assert.assertEquals(((-2 % 3) + 3) % 3, resV2.vector[0]); + //use smallint as outputTypeInfo + expr = new PosModLongToLong(0, 3, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("smallint")); + //((ISetLongArg) expr).setArg(3); + expr.evaluate(b); + Assert.assertEquals(((-2 % 3) + 3) % 3, resV2.vector[0]); + //use int as outputTypeInfo + expr = new PosModLongToLong(0, 3, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("int")); + //((ISetLongArg) expr).setArg(3); + expr.evaluate(b); + Assert.assertEquals(((-2 % 3) + 3) % 3, resV2.vector[0]); + //use bigint + expr = new PosModLongToLong(0, 3, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + //((ISetLongArg) expr).setArg(3); + expr.evaluate(b); + Assert.assertEquals(((-2 % 3) + 3) % 3, resV2.vector[0]); + } + + @Test + public void testVectorPosModWithFloatOutputType() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector inV = (DoubleColumnVector) b.cols[0]; + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + inV.vector[4] = -4.0; + VectorExpression expr = new PosModDoubleToDouble(0, 0.3d, 1); + expr.setOutputTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("float")); + expr.evaluate(b); + Assert.assertTrue(equalsWithinTolerance(((-4.0f % 0.3f) + 0.3f) % 0.3f, resultV.vector[4])); + + // test long->long version + b = getVectorizedRowBatchLongInLongOut(); + LongColumnVector resV2 = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr = new PosModLongToLong(0, 3, 1); //((ISetLongArg) expr).setArg(3); expr.evaluate(b); Assert.assertEquals(((-2 % 3) + 3) % 3, resV2.vector[0]); diff --git a/ql/src/test/queries/clientpositive/vectorization_numeric_overflows.q b/ql/src/test/queries/clientpositive/vectorization_numeric_overflows.q new file mode 100644 index 0000000000000000000000000000000000000000..828a029df402daa818e53d035c9970f1a4f0912c --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorization_numeric_overflows.q @@ -0,0 +1,158 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cbo.enable=false; +set hive.vectorized.use.checked.expressions=true; + +--SORT_QUERY_RESULTS + +CREATE TABLE test_overflow ( + ctinyint1 TINYINT, + ctinyint2 TINYINT, + csmallint1 SMALLINT, + csmallint2 SMALLINT, + cint1 INT, + cint2 INT, + cbigint1 BIGINT, + cbigint2 BIGINT, + cfloat1 FLOAT, + cfloat2 FLOAT, + cdouble1 DOUBLE, + cdouble2 DOUBLE) +STORED AS PARQUET; + +-- values stored in the columns are the min and max respectively for each column type +insert into test_overflow values (-128, 127, -32768, 32767, -2147483648, 2147483647, -9223372036854775808, 9223372036854775807, 1.401298464324817E-45, 3.4028234663852886E38, 4.9E-324, 1.7976931348623157E308); + +insert into test_overflow values (127, -128, 32767, -32768, 2147483647, -2147483648, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324); + +-- stored values represent the MAX_RANGE/2 and MAX_RANGE/2 + 1 for integer types. These are used to cause overflow in pmod UDF +insert into test_overflow values (64, 65, 32767, -32768, 1073741824, 1073741825, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324); + +select * from test_overflow order by cint1; + +-- the substraction in the where clause tips integer column below the min value causing it to underflow +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1; +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1; + +-- the addition in the where clause tips integer column over the max value causing it to overflow +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2; +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2; + + +-- test overflow in multiply operator +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2; +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2; + + +-- underflow in tinyint case +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1; +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1; + +-- overflow in tinyint case +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2; +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2; + +-- overflow for short datatype in multiply operation +set hive.vectorized.execution.enabled=true; +explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2; +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=false; +explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2; +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2; + +create table parquettable (t1 tinyint, t2 tinyint, i1 int, i2 int) stored as parquet; +insert into parquettable values (-104, 25,2147483647, 10), (-112, 24, -2147483648, 10), (54, 9, 2147483647, -50); + + +-- test ColSubstractCol operation underflow +explain vectorization expression select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc; +select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc; + +-- the above query should return the same results in non-vectorized mode +set hive.vectorized.execution.enabled=false; +select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc; + +-- test integer ColSubstractCol overflow +set hive.vectorized.execution.enabled=true; +explain vectorization expression select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc; +select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc; + +-- the above query should return the same results in non-vectorized mode +set hive.vectorized.execution.enabled=false; +select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc; + +--Test ColumnUnaryMinus.txt +set hive.vectorized.execution.enabled=false; +select cint1 from test_overflow where -cint1 >= 0 order by cint1; +select cfloat1 from test_overflow where -cfloat1 >= 0 order by cfloat1; + +set hive.vectorized.execution.enabled=true; +select cint1 from test_overflow where -cint1 >= 0 order by cint1; +select cfloat1 from test_overflow where -cfloat1 >= 0 order by cfloat1; + + +-- test scalarMultiplyCol overflow +set hive.vectorized.execution.enabled=false; +select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 order by cint1; + +set hive.vectorized.execution.enabled=true; +select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 order by cint1; + +-- test ConstantVectorExpression overflow behavior +-- this works without checked expressions but good to have a test case exercising this +set hive.vectorized.execution.enabled=false; +select 2147483648 from test_overflow; + +set hive.vectorized.execution.enabled=false; +select 2147483648 from test_overflow; + +-- test PosMod vector expression, the third row will overflow the int range and cause the result to be negative +set hive.vectorized.execution.enabled=false; +select * from test_overflow where pmod(cint1, 1073741825) > 0 order by cint1; + +-- results should match in non-vectorized execution +set hive.vectorized.execution.enabled=true; +select * from test_overflow where pmod(cint1, 1073741825) > 0 order by cint1; + +-- cause short range overflow in pmod implementation, this works without posmod range checks but still good to have +set hive.vectorized.execution.enabled=false; +select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1; + +set hive.vectorized.execution.enabled=true; +explain vectorization expression select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1; +select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1; diff --git a/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out b/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out new file mode 100644 index 0000000000000000000000000000000000000000..344db2b897ad6591591b87ed41a1dc1ec7e5e183 --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out @@ -0,0 +1,1150 @@ +PREHOOK: query: CREATE TABLE test_overflow ( + ctinyint1 TINYINT, + ctinyint2 TINYINT, + csmallint1 SMALLINT, + csmallint2 SMALLINT, + cint1 INT, + cint2 INT, + cbigint1 BIGINT, + cbigint2 BIGINT, + cfloat1 FLOAT, + cfloat2 FLOAT, + cdouble1 DOUBLE, + cdouble2 DOUBLE) +STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_overflow +POSTHOOK: query: CREATE TABLE test_overflow ( + ctinyint1 TINYINT, + ctinyint2 TINYINT, + csmallint1 SMALLINT, + csmallint2 SMALLINT, + cint1 INT, + cint2 INT, + cbigint1 BIGINT, + cbigint2 BIGINT, + cfloat1 FLOAT, + cfloat2 FLOAT, + cdouble1 DOUBLE, + cdouble2 DOUBLE) +STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_overflow +PREHOOK: query: insert into test_overflow values (-128, 127, -32768, 32767, -2147483648, 2147483647, -9223372036854775808, 9223372036854775807, 1.401298464324817E-45, 3.4028234663852886E38, 4.9E-324, 1.7976931348623157E308) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_overflow +POSTHOOK: query: insert into test_overflow values (-128, 127, -32768, 32767, -2147483648, 2147483647, -9223372036854775808, 9223372036854775807, 1.401298464324817E-45, 3.4028234663852886E38, 4.9E-324, 1.7976931348623157E308) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_overflow +POSTHOOK: Lineage: test_overflow.cbigint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cbigint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint2 SCRIPT [] +PREHOOK: query: insert into test_overflow values (127, -128, 32767, -32768, 2147483647, -2147483648, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_overflow +POSTHOOK: query: insert into test_overflow values (127, -128, 32767, -32768, 2147483647, -2147483648, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_overflow +POSTHOOK: Lineage: test_overflow.cbigint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cbigint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint2 SCRIPT [] +PREHOOK: query: insert into test_overflow values (64, 65, 32767, -32768, 1073741824, 1073741825, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_overflow +POSTHOOK: query: insert into test_overflow values (64, 65, 32767, -32768, 1073741824, 1073741825, 9223372036854775807, -9223372036854775808, 3.4028234663852886E38, 1.401298464324817E-45, 1.7976931348623157E308, 4.9E-324) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_overflow +POSTHOOK: Lineage: test_overflow.cbigint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cbigint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cdouble2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cfloat2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.cint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.csmallint2 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint1 SCRIPT [] +POSTHOOK: Lineage: test_overflow.ctinyint2 SCRIPT [] +PREHOOK: query: select * from test_overflow order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +64 65 32767 -32768 1073741824 1073741825 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: explain vectorization expression +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:int, val 0)(children: LongColSubtractLongScalarChecked(col 4:int, val 2) -> 13:int) + predicate: ((cint1 - 2) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint1 (type: int), (cint1 - 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 13] + selectExpressions: LongColSubtractLongScalarChecked(col 4:int, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-2147483648 2147483646 +1073741824 1073741822 +2147483647 2147483645 +PREHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, (cint1-2) from test_overflow where (cint1 - 2) > 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-2147483648 2147483646 +1073741824 1073741822 +2147483647 2147483645 +PREHOOK: query: explain vectorization expression +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:int, val 0)(children: LongColAddLongScalarChecked(col 5:int, val 2) -> 13:int) + predicate: ((cint2 + 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint2 (type: int), (cint2 + 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 13] + selectExpressions: LongColAddLongScalarChecked(col 5:int, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-2147483648 -2147483646 +2147483647 -2147483647 +PREHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2+2) from test_overflow where (cint2 + 2) < 0 order by cint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-2147483648 -2147483646 +2147483647 -2147483647 +PREHOOK: query: explain vectorization expression +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:int, val 0)(children: LongColMultiplyLongScalarChecked(col 5:int, val 2) -> 13:int) + predicate: ((cint2 * 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint2 (type: int), (cint2 * 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 13] + selectExpressions: LongColMultiplyLongScalarChecked(col 5:int, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +1073741825 -2147483646 +2147483647 -2 +PREHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint2, (cint2 * 2) from test_overflow where (cint2 * 2) < 0 order by cint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +1073741825 -2147483646 +2147483647 -2 +PREHOOK: query: explain vectorization expression +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:tinyint, val 0)(children: LongColSubtractLongScalarChecked(col 0:tinyint, val 2) -> 13:tinyint) + predicate: ((ctinyint1 - 2) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint1 (type: tinyint), (ctinyint1 - 2) (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 13] + selectExpressions: LongColSubtractLongScalarChecked(col 0:tinyint, val 2) -> 13:tinyint + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 126 +64 62 +127 125 +PREHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint1, (ctinyint1-2Y) from test_overflow where (ctinyint1 - 2Y) > 0 order by ctinyint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 126 +64 62 +127 125 +PREHOOK: query: explain vectorization expression +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:tinyint, val 0)(children: LongColAddLongScalarChecked(col 1:tinyint, val 2) -> 13:tinyint) + predicate: ((ctinyint2 + 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint2 (type: tinyint), (ctinyint2 + 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 13] + selectExpressions: LongColAddLongScalarChecked(col 1:tinyint, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 -126 +127 129 +PREHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint2, (ctinyint2 + 2) from test_overflow where (ctinyint2 + 2Y) < 0 order by ctinyint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 -126 +127 129 +PREHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 13:smallint, val 0)(children: LongColMultiplyLongScalarChecked(col 3:smallint, val 2) -> 13:smallint) + predicate: ((csmallint2 * 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint2 (type: smallint), (csmallint2 * 2) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 13] + selectExpressions: LongColMultiplyLongScalarChecked(col 3:smallint, val 2) -> 13:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +32767 65534 +PREHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint2 * 2) < 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint2 (type: smallint), (csmallint2 * 2) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select csmallint2, csmallint2 * 2 from test_overflow where (csmallint2 * 2S) < 0 order by csmallint2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +32767 65534 +PREHOOK: query: create table parquettable (t1 tinyint, t2 tinyint, i1 int, i2 int) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquettable +POSTHOOK: query: create table parquettable (t1 tinyint, t2 tinyint, i1 int, i2 int) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquettable +PREHOOK: query: insert into parquettable values (-104, 25,2147483647, 10), (-112, 24, -2147483648, 10), (54, 9, 2147483647, -50) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquettable +POSTHOOK: query: insert into parquettable values (-104, 25,2147483647, 10), (-112, 24, -2147483648, 10), (54, 9, 2147483647, -50) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquettable +POSTHOOK: Lineage: parquettable.i1 SCRIPT [] +POSTHOOK: Lineage: parquettable.i2 SCRIPT [] +POSTHOOK: Lineage: parquettable.t1 SCRIPT [] +POSTHOOK: Lineage: parquettable.t2 SCRIPT [] +PREHOOK: query: explain vectorization expression select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquettable + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((t1 - t2) < 50) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 - t2) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint) + sort order: - + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: tinyint), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +54 9 45 +PREHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select t1, t2, (t1-t2) as diff from parquettable where (t1-t2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +54 9 45 +PREHOOK: query: explain vectorization expression select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquettable + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 5:int, val 50)(children: LongColSubtractLongColumnChecked(col 2:int, col 3:int) -> 5:int) + predicate: ((i1 - i2) < 50) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i1 (type: int), i2 (type: int), (i1 - i2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 5] + selectExpressions: LongColSubtractLongColumnChecked(col 2:int, col 3:int) -> 5:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +2147483647 -50 -2147483599 +PREHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +PREHOOK: type: QUERY +PREHOOK: Input: default@parquettable +#### A masked pattern was here #### +POSTHOOK: query: select i1, i2, (i1-i2) as diff from parquettable where (i1-i2) < 50 order by diff desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquettable +#### A masked pattern was here #### +2147483647 -50 -2147483599 +PREHOOK: query: select cint1 from test_overflow where -cint1 >= 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1 from test_overflow where -cint1 >= 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 order by cfloat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 order by cfloat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cint1 from test_overflow where -cint1 >= 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1 from test_overflow where -cint1 >= 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 order by cfloat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cfloat1 from test_overflow where -cfloat1 >= 0 order by cfloat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +PREHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 0 +PREHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select cint1, 2*cint2 from test_overflow where 2*cint2 >= 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483647 0 +PREHOOK: query: select 2147483648 from test_overflow +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select 2147483648 from test_overflow +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483648 +2147483648 +2147483648 +PREHOOK: query: select 2147483648 from test_overflow +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select 2147483648 from test_overflow +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +2147483648 +2147483648 +2147483648 +PREHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 order by cint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(cint1, 1073741825) > 0 order by cint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +64 65 32767 -32768 1073741824 1073741825 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +PREHOOK: query: explain vectorization expression select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_overflow + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:smallint, val 0)(children: PosModLongToLong(col 2, divisor 16385) -> 13:smallint) + predicate: ((csmallint1 pmod 16385) > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint1 (type: tinyint), ctinyint2 (type: tinyint), csmallint1 (type: smallint), csmallint2 (type: smallint), cint1 (type: int), cint2 (type: int), cbigint1 (type: bigint), cbigint2 (type: bigint), cfloat1 (type: float), cfloat2 (type: float), cdouble1 (type: double), cdouble2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: smallint), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: float), _col9 (type: float), _col10 (type: double), _col11 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: smallint), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: bigint), VALUE._col7 (type: float), VALUE._col8 (type: float), VALUE._col9 (type: double), VALUE._col10 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_overflow +#### A masked pattern was here #### +POSTHOOK: query: select * from test_overflow where pmod(csmallint1, 16385S) > 0 order by ctinyint1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_overflow +#### A masked pattern was here #### +-128 127 -32768 32767 -2147483648 2147483647 -9223372036854775808 9223372036854775807 1.4E-45 3.4028235E38 4.9E-324 1.7976931348623157E308 +64 65 32767 -32768 1073741824 1073741825 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 +127 -128 32767 -32768 2147483647 -2147483648 9223372036854775807 -9223372036854775808 3.4028235E38 1.4E-45 1.7976931348623157E308 4.9E-324 diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 657ea34e11f7465e6c77d45128b298e7326a057b..ffd55828d9549d89da6d268101e845cf134b8a27 100644 --- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -165,55 +165,92 @@ {"IntervalYearMonthArithmeticTimestamp", "Add","+", "interval_year_month", "Col", "timestamp", "Scalar"}, // Long/double arithmetic + {"ColumnArithmeticScalar", "Add", "long", "long", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, + {"ColumnArithmeticScalar", "Subtract", "long", "long", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, {"ColumnArithmeticScalar", "Multiply", "long", "long", "*"}, + {"ColumnArithmeticScalar", "Multiply", "long", "long", "*", "CHECKED"}, {"ColumnArithmeticScalar", "Add", "long", "double", "+"}, + {"ColumnArithmeticScalar", "Add", "long", "double", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "long", "double", "-"}, + {"ColumnArithmeticScalar", "Subtract", "long", "double", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Multiply", "long", "double", "*"}, + {"ColumnArithmeticScalar", "Multiply", "long", "double", "*", "CHECKED"}, {"ColumnArithmeticScalar", "Add", "double", "long", "+"}, + {"ColumnArithmeticScalar", "Add", "double", "long", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "double", "long", "-"}, + {"ColumnArithmeticScalar", "Subtract", "double", "long", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Multiply", "double", "long", "*"}, + {"ColumnArithmeticScalar", "Multiply", "double", "long", "*", "CHECKED"}, - {"ColumnArithmeticScalar", "Add", "double", "double", "+"}, + {"ColumnArithmeticScalar", "Add", "double", "double", "+",}, + {"ColumnArithmeticScalar", "Add", "double", "double", "+", "CHECKED"}, {"ColumnArithmeticScalar", "Subtract", "double", "double", "-"}, + {"ColumnArithmeticScalar", "Subtract", "double", "double", "-", "CHECKED"}, {"ColumnArithmeticScalar", "Multiply", "double", "double", "*"}, + {"ColumnArithmeticScalar", "Multiply", "double", "double", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "long", "long", "+"}, + {"ScalarArithmeticColumn", "Add", "long", "long", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "long", "long", "-"}, + {"ScalarArithmeticColumn", "Subtract", "long", "long", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "long", "long", "*"}, + {"ScalarArithmeticColumn", "Multiply", "long", "long", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "long", "double", "+"}, + {"ScalarArithmeticColumn", "Add", "long", "double", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "long", "double", "-"}, + {"ScalarArithmeticColumn", "Subtract", "long", "double", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "long", "double", "*"}, + {"ScalarArithmeticColumn", "Multiply", "long", "double", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "double", "long", "+"}, + {"ScalarArithmeticColumn", "Add", "double", "long", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "double", "long", "-"}, + {"ScalarArithmeticColumn", "Subtract", "double", "long", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "double", "long", "*"}, + {"ScalarArithmeticColumn", "Multiply", "double", "long", "*", "CHECKED"}, {"ScalarArithmeticColumn", "Add", "double", "double", "+"}, + {"ScalarArithmeticColumn", "Add", "double", "double", "+", "CHECKED"}, {"ScalarArithmeticColumn", "Subtract", "double", "double", "-"}, + {"ScalarArithmeticColumn", "Subtract", "double", "double", "-", "CHECKED"}, {"ScalarArithmeticColumn", "Multiply", "double", "double", "*"}, + {"ScalarArithmeticColumn", "Multiply", "double", "double", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "long", "long", "+"}, + {"ColumnArithmeticColumn", "Add", "long", "long", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "long", "long", "-"}, + {"ColumnArithmeticColumn", "Subtract", "long", "long", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "long", "long", "*"}, + {"ColumnArithmeticColumn", "Multiply", "long", "long", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "long", "double", "+"}, + {"ColumnArithmeticColumn", "Add", "long", "double", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "long", "double", "-"}, + {"ColumnArithmeticColumn", "Subtract", "long", "double", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "long", "double", "*"}, + {"ColumnArithmeticColumn", "Multiply", "long", "double", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "double", "long", "+"}, + {"ColumnArithmeticColumn", "Add", "double", "long", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "double", "long", "-"}, + {"ColumnArithmeticColumn", "Subtract", "double", "long", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "double", "long", "*"}, + {"ColumnArithmeticColumn", "Multiply", "double", "long", "*", "CHECKED"}, {"ColumnArithmeticColumn", "Add", "double", "double", "+"}, + {"ColumnArithmeticColumn", "Add", "double", "double", "+", "CHECKED"}, {"ColumnArithmeticColumn", "Subtract", "double", "double", "-"}, + {"ColumnArithmeticColumn", "Subtract", "double", "double", "-", "CHECKED"}, {"ColumnArithmeticColumn", "Multiply", "double", "double", "*"}, + {"ColumnArithmeticColumn", "Multiply", "double", "double", "*", "CHECKED"}, + //Divide operations are not CHECKED because the output is always of the type double {"ColumnDivideScalar", "Divide", "long", "double", "/"}, {"ColumnDivideScalar", "Divide", "double", "long", "/"}, {"ColumnDivideScalar", "Divide", "double", "double", "/"}, @@ -225,16 +262,27 @@ {"ColumnDivideColumn", "Divide", "double", "double", "/"}, {"ColumnDivideScalar", "Modulo", "long", "long", "%"}, + {"ColumnDivideScalar", "Modulo", "long", "long", "%", "CHECKED"}, {"ColumnDivideScalar", "Modulo", "long", "double", "%"}, + {"ColumnDivideScalar", "Modulo", "long", "double", "%", "CHECKED"}, {"ColumnDivideScalar", "Modulo", "double", "long", "%"}, + {"ColumnDivideScalar", "Modulo", "double", "long", "%", "CHECKED"}, {"ColumnDivideScalar", "Modulo", "double", "double", "%"}, + {"ColumnDivideScalar", "Modulo", "double", "double", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "long", "long", "%"}, + {"ScalarDivideColumn", "Modulo", "long", "long", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "long", "double", "%"}, + {"ScalarDivideColumn", "Modulo", "long", "double", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "double", "long", "%"}, + {"ScalarDivideColumn", "Modulo", "double", "long", "%", "CHECKED"}, {"ScalarDivideColumn", "Modulo", "double", "double", "%"}, + {"ScalarDivideColumn", "Modulo", "double", "double", "%", "CHECKED"}, {"ColumnDivideColumn", "Modulo", "long", "double", "%"}, + {"ColumnDivideColumn", "Modulo", "long", "double", "%", "CHECKED"}, {"ColumnDivideColumn", "Modulo", "double", "long", "%"}, + {"ColumnDivideColumn", "Modulo", "double", "long", "%", "CHECKED"}, {"ColumnDivideColumn", "Modulo", "double", "double", "%"}, + {"ColumnDivideColumn", "Modulo", "double", "double", "%", "CHECKED"}, {"ColumnArithmeticScalarDecimal", "Add"}, {"ColumnArithmeticScalarDecimal", "Subtract"}, @@ -996,7 +1044,9 @@ // classes {"ColumnUnaryMinus", "long"}, + {"ColumnUnaryMinus", "long", "CHECKED"}, {"ColumnUnaryMinus", "double"}, + {"ColumnUnaryMinus", "double", "CHECKED"}, // IF conditional expression // fileHeader, resultType, arg2Type, arg3Type @@ -2087,8 +2137,10 @@ private void generateColumnUnaryMinus(String[] tdesc) throws Exception { String inputColumnVectorType = this.getColumnVectorType(operandType); String outputColumnVectorType = inputColumnVectorType; String returnType = operandType; - String className = getCamelCaseType(operandType) + "ColUnaryMinus"; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + boolean checked = (tdesc.length == 3 && "CHECKED".equals(tdesc[2])); + String className = getCamelCaseType(operandType) + "ColUnaryMinus" + + (checked ? "Checked" : ""); + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); String vectorExprArgType = operandType; if (operandType.equals("long")) { @@ -2101,7 +2153,10 @@ private void generateColumnUnaryMinus(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", outputColumnVectorType); templateString = templateString.replaceAll("", operandType); templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", getCamelCaseType(returnType)); templateString = templateString.replaceAll("", vectorExprArgType); + String ifDefined = checked ? tdesc[2] : ""; + templateString = evaluateIfDefined(templateString, ifDefined); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } @@ -2260,8 +2315,10 @@ private void generateColumnArithmeticColumn(String [] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; String operandType2 = tdesc[3]; + boolean checked = tdesc.length == 6 && "CHECKED".equals(tdesc[5]); String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Column"; + + "Col" + operatorName + getCamelCaseType(operandType2) + "Column" + + (checked ? "Checked" : ""); String returnType = getArithmeticReturnType(operandType1, operandType2); generateColumnArithmeticOperatorColumn(tdesc, returnType, className); } @@ -2636,6 +2693,7 @@ private void generateColumnArithmeticOperatorColumn(String[] tdesc, String retur String inputColumnVectorType1 = this.getColumnVectorType(operandType1); String inputColumnVectorType2 = this.getColumnVectorType(operandType2); String operatorSymbol = tdesc[4]; + String ifDefined = tdesc.length == 6 ? tdesc[5] : ""; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); @@ -2650,14 +2708,25 @@ private void generateColumnArithmeticOperatorColumn(String[] tdesc, String retur templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); templateString = templateString.replaceAll("", getCamelCaseType(returnType)); + templateString = evaluateIfDefined(templateString, ifDefined); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - testCodeGen.addColumnColumnOperationTestCases( + if (!ifDefined.isEmpty()) { + testCodeGen.addColumnColumnOperationTestCases( className, inputColumnVectorType1, inputColumnVectorType2, outputColumnVectorType); + } else { + testCodeGen.addColumnColumnOperationCheckedTestCases( + className, + inputColumnVectorType1, + inputColumnVectorType2, + outputColumnVectorType, + "long".equalsIgnoreCase(returnType)); + } } private void generateColumnCompareOperatorScalar(String[] tdesc, boolean filter, @@ -2713,6 +2782,8 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur String inputColumnVectorType = this.getColumnVectorType(operandType1); String operatorSymbol = tdesc[4]; + String ifDefined = (tdesc.length == 6 ? tdesc[5] : ""); + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); @@ -2724,6 +2795,9 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", getCamelCaseType(returnType)); + templateString = evaluateIfDefined(templateString, ifDefined); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); @@ -2732,12 +2806,24 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur testScalarType = "long"; } - testCodeGen.addColumnScalarOperationTestCases( + if (ifDefined.isEmpty()) { + testCodeGen.addColumnScalarOperationTestCases( true, className, inputColumnVectorType, outputColumnVectorType, testScalarType); + } else { + //this is a checked expression use a different template for checked expressions + testCodeGen.addColumnScalarOperationCheckedTestCases( + true, + className, + inputColumnVectorType, + outputColumnVectorType, + testScalarType, + "long".equalsIgnoreCase(returnType) + ); + } } private void generateScalarCompareOperatorColumn(String[] tdesc, boolean filter, @@ -2793,6 +2879,7 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur returnType == null ? "long" : returnType); String inputColumnVectorType = this.getColumnVectorType(operandType2); String operatorSymbol = tdesc[4]; + String ifDefined = (tdesc.length == 6 ? tdesc[5] : ""); //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); @@ -2806,6 +2893,7 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); templateString = templateString.replaceAll("", getCamelCaseType(returnType)); + templateString = evaluateIfDefined(templateString, ifDefined); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); @@ -2814,12 +2902,13 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur testScalarType = "long"; } - testCodeGen.addColumnScalarOperationTestCases( - false, - className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + if (!ifDefined.isEmpty()) { + testCodeGen.addColumnScalarOperationTestCases(false, className, inputColumnVectorType, + outputColumnVectorType, testScalarType); + } else { + testCodeGen.addColumnScalarOperationCheckedTestCases(false, className, inputColumnVectorType, + outputColumnVectorType, testScalarType, "long".equalsIgnoreCase(returnType)); + } } //Binary arithmetic operator @@ -2827,8 +2916,10 @@ private void generateColumnArithmeticScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; String operandType2 = tdesc[3]; + boolean checked = tdesc.length == 6 && "CHECKED".equals(tdesc[5]); String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar"; + + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar" + + (checked ? "Checked" : ""); String returnType = getArithmeticReturnType(operandType1, operandType2); generateColumnArithmeticOperatorScalar(tdesc, returnType, className); } @@ -2921,8 +3012,10 @@ private void generateScalarArithmeticColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; String operandType2 = tdesc[3]; + boolean checked = (tdesc.length == 6 && "CHECKED".equals(tdesc[5])); String className = getCamelCaseType(operandType1) - + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column"; + + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column" + + (checked ? "Checked" : ""); String returnType = getArithmeticReturnType(operandType1, operandType2); generateScalarArithmeticOperatorColumn(tdesc, returnType, className); } diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorTestCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorTestCode.java index d97646f8b1c4a074da59b4685939fc4359c9c30d..8ff7e82a14739ed5bb2ccbf2baff074d44d92b5a 100644 --- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorTestCode.java +++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorTestCode.java @@ -38,8 +38,10 @@ public enum TestSuiteClassName{ TestColumnScalarOperationVectorExpressionEvaluation, + TestColumnScalarOperationVectorExpressionCheckedEvaluation, TestColumnScalarFilterVectorExpressionEvaluation, TestColumnColumnOperationVectorExpressionEvaluation, + TestColumnColumnOperationVectorExpressionCheckedEvaluation, TestColumnColumnFilterVectorExpressionEvaluation, } @@ -58,6 +60,56 @@ public GenVectorTestCode(String testOutputDir, String testTemplateDirectory) { } + public void addColumnScalarOperationCheckedTestCases(boolean op1IsCol, String vectorExpClassName, + String inputColumnVectorType, String outputColumnVectorType, String scalarType, + boolean isReturnTypeLong) + throws IOException { + + TestSuiteClassName template = + TestSuiteClassName.TestColumnScalarOperationVectorExpressionCheckedEvaluation; + + //Read the template into a string; + String templateFile = GenVectorCode.joinPath(this.testTemplateDirectory,template.toString()+".txt"); + String templateString = removeTemplateComments(GenVectorCode.readFile(templateFile)); + String[] outputTypeInfos = null; + if (isReturnTypeLong) { + outputTypeInfos = new String[] {"tinyint", "smallint", "int", "bigint"}; + } else { + outputTypeInfos = new String[] {"float", "double"}; + } + for (String outputTypeInfo : outputTypeInfos) { + for (Boolean[] testMatrix : new Boolean[][] { + // Pairwise: InitOuputColHasNulls, InitOuputColIsRepeating, ColumnHasNulls, ColumnIsRepeating + { false, true, true, true }, { false, false, false, false }, { true, false, true, false }, + { true, true, false, false }, { true, false, false, true } }) { + String testCase = templateString; + testCase = testCase.replaceAll("", + "test" + vectorExpClassName + + createNullRepeatingNameFragment("Out", testMatrix[0], testMatrix[1]) + + createNullRepeatingNameFragment("Col", testMatrix[2], testMatrix[3]) + + createOutputTypeInfoFragment("Ret", outputTypeInfo)); + testCase = testCase.replaceAll("", outputTypeInfo); + testCase = testCase.replaceAll("", vectorExpClassName); + testCase = testCase.replaceAll("", inputColumnVectorType); + testCase = testCase.replaceAll("", outputColumnVectorType); + testCase = testCase.replaceAll("", scalarType); + testCase = testCase.replaceAll("", GenVectorCode.getCamelCaseType(scalarType)); + testCase = testCase.replaceAll("", testMatrix[0].toString()); + testCase = testCase.replaceAll("", testMatrix[1].toString()); + testCase = testCase.replaceAll("", testMatrix[2].toString()); + testCase = testCase.replaceAll("", testMatrix[3].toString()); + + if (op1IsCol) { + testCase = testCase.replaceAll("", "0, scalarValue"); + } else { + testCase = testCase.replaceAll("", "scalarValue, 0"); + } + + testsuites.get(template).append(testCase); + } + } + } + public void addColumnScalarOperationTestCases(boolean op1IsCol, String vectorExpClassName, String inputColumnVectorType, String outputColumnVectorType, String scalarType) throws IOException { @@ -146,6 +198,59 @@ public void addColumnScalarFilterTestCases(boolean op1IsCol, String vectorExpCla } } + public void addColumnColumnOperationCheckedTestCases(String vectorExpClassName, + String inputColumnVectorType1, String inputColumnVectorType2, String outputColumnVectorType, + boolean isReturnTypeLong) + throws IOException { + + TestSuiteClassName template= + TestSuiteClassName.TestColumnColumnOperationVectorExpressionCheckedEvaluation; + + //Read the template into a string; + String templateFile = GenVectorCode.joinPath(this.testTemplateDirectory,template.toString()+".txt"); + String templateString = removeTemplateComments(GenVectorCode.readFile(templateFile)); + String[] outputTypeInfos = null; + if (isReturnTypeLong) { + outputTypeInfos = new String[] {"tinyint", "smallint", "int", "bigint"}; + } else { + outputTypeInfos = new String[] {"float", "double"}; + } + for (String outputTypeInfo : outputTypeInfos) { + for (Boolean[] testMatrix : new Boolean[][] { + // Pairwise: InitOuputColHasNulls, InitOuputColIsRepeating, Column1HasNulls, + // Column1IsRepeating, Column2HasNulls, Column2IsRepeating + { true, true, false, true, true, true }, + { false, false, true, false, false, false }, + { true, false, true, false, true, true }, + { true, true, true, true, false, false }, + { false, false, false, true, true, false }, + { false, true, false, false, false, true } }) { + String testCase = templateString; + testCase = testCase.replaceAll("", + "test" + + vectorExpClassName + + createNullRepeatingNameFragment("Out", testMatrix[0], + testMatrix[1]) + + createNullRepeatingNameFragment("C1", testMatrix[2], testMatrix[3]) + + createNullRepeatingNameFragment("C2", testMatrix[4], testMatrix[5]) + + createOutputTypeInfoFragment("Ret", outputTypeInfo)); + testCase = testCase.replaceAll("", vectorExpClassName); + testCase = testCase.replaceAll("", inputColumnVectorType1); + testCase = testCase.replaceAll("", inputColumnVectorType2); + testCase = testCase.replaceAll("", outputColumnVectorType); + testCase = testCase.replaceAll("", testMatrix[0].toString()); + testCase = testCase.replaceAll("", testMatrix[1].toString()); + testCase = testCase.replaceAll("", testMatrix[2].toString()); + testCase = testCase.replaceAll("", testMatrix[3].toString()); + testCase = testCase.replaceAll("", testMatrix[4].toString()); + testCase = testCase.replaceAll("", testMatrix[5].toString()); + testCase = testCase.replaceAll("", outputTypeInfo); + + testsuites.get(template).append(testCase); + } + } + } + public void addColumnColumnOperationTestCases(String vectorExpClassName, String inputColumnVectorType1, String inputColumnVectorType2, String outputColumnVectorType) throws IOException { @@ -240,21 +345,50 @@ public void generateTestSuites() throws IOException { } } - private static String createNullRepeatingNameFragment(String idenitfier, boolean nulls, boolean repeating) + private static String createNullRepeatingNameFragment(String identifier, boolean nulls, boolean repeating) { if(nulls || repeating){ if(nulls){ - idenitfier+="Nulls"; + identifier+="Nulls"; } if(repeating){ - idenitfier+="Repeats"; + identifier+="Repeats"; } - return idenitfier; + return identifier; } return ""; } + private static String createOutputTypeInfoFragment(String identifier, String outputTypeInfo) { + if (identifier == null) { + throw new RuntimeException("Received null input for the identifier"); + } + switch (outputTypeInfo) { + case "tinyint": { + return identifier + "TinyInt"; + } + case "smallint": { + return identifier + "SmallInt"; + } + case "int": { + return identifier + "Int"; + } + case "bigint": { + return identifier + "BigInt"; + } + case "float": { + return identifier + "Float"; + } + case "double": { + return identifier + "Double"; + } + default: { + throw new RuntimeException("Unsupported input typeInfo " + outputTypeInfo); + } + } + } + private static String removeTemplateComments(String templateString){ return templateString.replaceAll("(?s)", ""); }