diff --git a/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java b/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java index ec219fd..3939511 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java @@ -1604,4 +1604,13 @@ public String toString() { + signum + ", BigDecimal.toString=" + toBigDecimal().toString() + ", unscaledValue=[" + unscaledValue.toString() + "])"; } + + /** + * Vectorized execution uses the smallest possible positive non-zero + * value to prevent possible later zero-divide exceptions. Set the field + * to this value (1 in the internal unsigned 128 bit int). + */ + public void setNullDataValue() { + unscaledValue.update(1, 0, 0, 0); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java new file mode 100644 index 0000000..23564bb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.io.Writable; + +public class DecimalColumnVector extends ColumnVector { + + /** + * A vector if Decimal128 objects. These are mutable and have fairly + * efficient operations on them. This will make it faster to load + * column vectors and perform decimal vector operations with decimal- + * specific VectorExpressions. + * + * For high performance and easy access to this low-level structure, + * the fields are public by design (as they are in other ColumnVector + * types). + */ + public Decimal128[] vector; + public short scale; + public short precision; + + public DecimalColumnVector(int precision, int scale) { + super(VectorizedRowBatch.DEFAULT_SIZE); + this.precision = (short) precision; + this.scale = (short) scale; + final int len = VectorizedRowBatch.DEFAULT_SIZE; + vector = new Decimal128[len]; + for (int i = 0; i < len; i++) { + vector[i] = new Decimal128(0, this.scale); + } + } + + @Override + public Writable getWritableObject(int index) { + // TODO Auto-generated method stub + return null; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + // TODO Auto-generated method stub + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java new file mode 100644 index 0000000..02dc5a4 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; + +/** + * Generated from template ColumnArithmeticColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class DecimalColAddDecimalColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public DecimalColAddDecimalColumn(int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public DecimalColAddDecimalColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; + DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + Decimal128[] vector1 = inputColVector1.vector; + Decimal128[] vector2 = inputColVector2.vector; + Decimal128[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + addChecked(0, vector1[0], vector2[0], outputColVector); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + addChecked(i, vector1[0], vector2[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + addChecked(i, vector1[0], vector2[i], outputColVector); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + addChecked(i, vector1[i], vector2[0], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + addChecked(i, vector1[i], vector2[0], outputColVector); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + addChecked(i, vector1[i], vector2[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + addChecked(i, vector1[i], vector2[i], outputColVector); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n); + } + + // Addition with overflow check. Overflow produces NULL output. + private static void addChecked(int i, Decimal128 left, Decimal128 right, + DecimalColumnVector outputColVector) { + try { + Decimal128.add(left, right, outputColVector.vector[i], outputColVector.scale); + outputColVector.vector[i].checkPrecisionOverflow(outputColVector.precision); + } catch (ArithmeticException e) { // catch on overflow + outputColVector.noNulls = false; + outputColVector.isNull[i] = true; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum1() { + return colNum1; + } + + public void setColNum1(int colNum1) { + this.colNum1 = colNum1; + } + + public int getColNum2() { + return colNum2; + } + + public void setColNum2(int colNum2) { + this.colNum2 = colNum2; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java index cd4cf92..18f63f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -27,6 +29,7 @@ * Utility functions to handle null propagation. */ public class NullUtil { + /** * Set the data value for all NULL entries to the designated NULL_VALUE. */ @@ -56,6 +59,7 @@ public static void setNullDataEntriesLong( public static void setNullOutputEntriesColScalar( ColumnVector v, boolean selectedInUse, int[] sel, int n) { if (v instanceof DoubleColumnVector) { + // No need to set null data entries because the input NaN values // will automatically propagate to the output. return; @@ -285,4 +289,32 @@ public static void propagateNullsColCol(ColumnVector inputColVector1, } } } + + /** + * Follow the convention that null decimal values are internally set to the smallest + * positive value available. Prevents accidental zero-divide later in expression + * evaluation. + */ + public static void setNullDataEntriesDecimal( + DecimalColumnVector v, boolean selectedInUse, int[] sel, + int n) { + if (v.noNulls) { + return; + } else if (v.isRepeating && v.isNull[0]) { + v.vector[0].setNullDataValue(); + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if(v.isNull[i]) { + v.vector[i].setNullDataValue(); + } + } + } else { + for (int i = 0; i != n; i++) { + if(v.isNull[i]) { + v.vector[i].setNullDataValue(); + } + } + } + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index 3ca727f..9086025 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -23,6 +23,8 @@ import static org.junit.Assert.assertTrue; import junit.framework.Assert; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; @@ -33,7 +35,7 @@ import org.junit.Test; /** - * Unit tests for vectori arithmetic expressions. + * Unit tests for vectorized arithmetic expressions. */ public class TestVectorArithmeticExpressions { @@ -284,4 +286,60 @@ public void testLongColDivideLongColumn() { assertFalse(out.noNulls); assertFalse(out.isRepeating); } + + @Test + public void testDecimalColAddDecimalColumn() { + VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols(); + VectorExpression expr = new DecimalColAddDecimalColumn(0, 1, 2); + DecimalColumnVector r = (DecimalColumnVector) b.cols[2]; + + // test without nulls + expr.evaluate(b); + assertTrue(r.vector[0].equals(new Decimal128("2.20", (short) 2))); + assertTrue(r.vector[1].equals(new Decimal128("-2.30", (short) 2))); + assertTrue(r.vector[2].equals(new Decimal128("1.00", (short) 2))); + + // test nulls propagation + b = getVectorizedRowBatch3DecimalCols(); + DecimalColumnVector c0 = (DecimalColumnVector) b.cols[0]; + c0.noNulls = false; + c0.isNull[0] = true; + r = (DecimalColumnVector) b.cols[2]; + expr.evaluate(b); + assertTrue(!r.noNulls && r.isNull[0]); + + // Verify null output data entry is not 0, but rather the value specified by design, + // which is the minimum non-0 value, 0.01 in this case. + assertTrue(r.vector[0].equals(new Decimal128("0.01", (short) 2))); + + // test that overflow produces NULL + b = getVectorizedRowBatch3DecimalCols(); + c0 = (DecimalColumnVector) b.cols[0]; + c0.vector[0].update("9999999999999999.99", (short) 2); // set to max possible value + r = (DecimalColumnVector) b.cols[2]; + expr.evaluate(b); // will cause overflow for result at position 0, must yield NULL + assertTrue(!r.noNulls && r.isNull[0]); + + // verify proper null output data value + assertTrue(r.vector[0].equals(new Decimal128("0.01", (short) 2))); + } + + private VectorizedRowBatch getVectorizedRowBatch3DecimalCols() { + VectorizedRowBatch b = new VectorizedRowBatch(3); + DecimalColumnVector v0, v1; + b.cols[0] = v0 = new DecimalColumnVector(18, 2); + b.cols[1] = v1 = new DecimalColumnVector(18, 2); + b.cols[2] = new DecimalColumnVector(18, 2); + v0.vector[0].update("1.20", (short) 2); + v0.vector[1].update("-3.30", (short) 2); + v0.vector[2].update("0", (short) 2); + + v1.vector[0].update("1.00", (short) 2); + v1.vector[1].update("1.00", (short) 2); + v1.vector[2].update("1.00", (short) 2); + + b.size = 3; + + return b; + } }