diff --git a/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java b/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java index 3939511..8031ed8 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java @@ -1422,15 +1422,24 @@ public long longValue() { } long ret; + UnsignedInt128 tmp; if (scale == 0) { - ret = (this.unscaledValue.getV1()) << 32L | this.unscaledValue.getV0(); + ret = this.unscaledValue.getV1(); + ret <<= 32L; + ret |= SqlMathUtil.LONG_MASK & this.unscaledValue.getV0(); } else { - UnsignedInt128 tmp = new UnsignedInt128(this.unscaledValue); + tmp = new UnsignedInt128(this.unscaledValue); tmp.scaleDownTenDestructive(scale); - ret = (tmp.getV1()) << 32L | tmp.getV0(); + ret = tmp.getV1(); + ret <<= 32L; + ret |= SqlMathUtil.LONG_MASK & tmp.getV0(); } - return SqlMathUtil.setSignBitLong(ret, signum > 0); + if (signum >= 0) { + return ret; + } else { + return -ret; + } } /** @@ -1613,4 +1622,25 @@ public String toString() { public void setNullDataValue() { unscaledValue.update(1, 0, 0, 0); } + + /** + * Update the value to a decimal value with the decimal point equal to + * val but with the decimal point inserted scale + * digits from the right. Behavior is undefined if scale is > 38 or < 0. + * + * For example, updateFixedPoint(123456789L, (short) 3) changes the target + * to the value 123456.789 with scale 3. + */ + public void updateFixedPoint(long val, short scale) { + this.scale = scale; + if (val < 0L) { + this.unscaledValue.update(-val); + this.signum = -1; + } else if (val == 0L) { + zeroClear(); + } else { + this.unscaledValue.update(val); + this.signum = 1; + } + } } diff --git a/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java b/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java index 3c05352..c65d27d 100644 --- a/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java +++ b/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java @@ -411,4 +411,22 @@ public void testPrecisionOverflow() { } catch (ArithmeticException ex) { } } + + @Test + public void testToLong() { + Decimal128 d = new Decimal128("1.25", (short) 2); + assertEquals(1, d.longValue()); + d.update("4294967295", (short) 0); // 2^32-1 + assertEquals(4294967295L, d.longValue()); + d.update("4294967296", (short) 0); // 2^32 -- needs 2 32 bit words + assertEquals(4294967296L, d.longValue()); + d.update("-4294967295", (short) 0); // -(2^32-1) + assertEquals(-4294967295L, d.longValue()); + d.update("-4294967296", (short) 0); // -(2^32) + assertEquals(-4294967296L, d.longValue()); + d.update("4294967295.01", (short) 2); // 2^32-1 + .01 + assertEquals(4294967295L, d.longValue()); + d.update("4294967296.01", (short) 2); // 2^32 + .01 + assertEquals(4294967296L, d.longValue()); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 23564bb..2d66b86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -58,4 +58,19 @@ public Writable getWritableObject(int index) { public void flatten(boolean selectedInUse, int[] sel, int size) { // TODO Auto-generated method stub } + + /** + * Check if the value at position i fits in the available precision, + * and convert the value to NULL if it does not. + */ + public void checkPrecisionOverflow(int i) { + try { + vector[i].checkPrecisionOverflow(precision); + } catch (ArithmeticException e) { + + // If the value won't fit in the available precision, the result is NULL + noNulls = false; + isNull[i] = true; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index 59758d1..1c70387 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -34,6 +34,7 @@ LONG(1), DOUBLE(2), STRING(3), + DECIMAL(4), ANY(7); private final int value; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java new file mode 100644 index 0000000..6a7762d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/** + * Type cast decimal to boolean + */ +public class CastDecimalToBoolean extends FuncDecimalToLong { + private static final long serialVersionUID = 1L; + + public CastDecimalToBoolean(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + /** + * If the input is 0 (i.e. the signum of the decimal is 0), return 0 for false. + * Otherwise, return 1 for true. + */ + protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { + outV.vector[i] = inV.vector[i].getSignum() == 0 ? 0 : 1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java new file mode 100644 index 0000000..14b91e1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Cast a decimal to a decimal, accounting for precision and scale changes. + * + * If other functions besides cast need to take a decimal in and produce a decimal, + * you can subclass this class or convert it to a superclass, and + * implement different methods for each operation. If that's done, the + * convert() method should be renamed to func() for consistency with other + * similar super classes such as FuncLongToDecimal. + */ +public class CastDecimalToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public CastDecimalToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public CastDecimalToDecimal() { + super(); + } + + /** + * Convert input decimal value to a decimal with a possibly different precision and scale, + * at position i in the respective vectors. + */ + protected void convert(DecimalColumnVector outV, DecimalColumnVector inV, int i) { + outV.vector[i].update(inV.vector[i]); + outV.vector[i].changeScaleDestructive(outV.scale); + outV.checkPrecisionOverflow(i); + } + + /** + * Cast decimal(p1, s1) to decimal(p2, s2). + * + * The precision and scale are recorded in the input and output vectors, + * respectively. + */ + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + convert(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + convert(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + convert(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + convert(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + convert(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + convert(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "decimal"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java new file mode 100644 index 0000000..2ba1509 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; + +public class CastDecimalToDouble extends FuncDecimalToDouble { + + private static final long serialVersionUID = 1L; + + public CastDecimalToDouble(int inputCol, int outputCol) { + super(inputCol, outputCol); + } + + protected void func(DoubleColumnVector outV, DecimalColumnVector inV, int i) { + outV.vector[i] = inV.vector[i].doubleValue(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java new file mode 100644 index 0000000..65a804d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/** + * Type cast decimal to long + */ +public class CastDecimalToLong extends FuncDecimalToLong { + private static final long serialVersionUID = 1L; + + public CastDecimalToLong(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { + outV.vector[i] = inV.vector[i].longValue(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java new file mode 100644 index 0000000..5b2a658 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; + +/** + * To support vectorized cast of decimal to string. + */ +public class CastDecimalToString extends DecimalToStringUnaryUDF { + + private static final long serialVersionUID = 1L; + + public CastDecimalToString(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i) { + String s = inV.vector[i].toFormalString(); + byte[] b = null; + try { + b = s.getBytes("UTF-8"); + } catch (Exception e) { + + // This should never happen. If it does, there is a bug. + throw new RuntimeException("Internal error: unable to convert decimal to string"); + } + outV.setVal(i, b, 0, b.length); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java new file mode 100644 index 0000000..df7e1ee --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.SqlMathUtil; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/** + * Type cast decimal to timestamp. The decimal value is interpreted + * as NNNN.DDDDDDDDD where NNNN is a number of seconds and DDDDDDDDD + * is a number of nano-seconds. + */ +public class CastDecimalToTimestamp extends FuncDecimalToLong { + private static final long serialVersionUID = 1L; + + /* The field tmp is a scratch variable for this operation. It is + * purposely not made static because if this code is ever made multi-threaded, + * each thread will then have its own VectorExpression tree and thus + * its own copy of the variable. + */ + private transient Decimal128 tmp = null; + private static transient Decimal128 tenE9 = new Decimal128(1000000000); + + public CastDecimalToTimestamp(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + tmp = new Decimal128(0); + } + + public CastDecimalToTimestamp() { + + // initialize local field after deserialization + tmp = new Decimal128(0); + } + + @Override + protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { + tmp.update(inV.vector[i]); + tmp.multiplyDestructive(tenE9, (short) 0); + + // set output + outV.vector[i] = tmp.longValue(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java new file mode 100644 index 0000000..14e30c3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; + +/** + * Cast input double to a decimal. Get target value scale from output column vector. + */ +public class CastDoubleToDecimal extends FuncDoubleToDecimal { + + private static final long serialVersionUID = 1L; + + public CastDoubleToDecimal(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i) { + outV.vector[i].update(inV.vector[i], outV.scale); + outV.checkPrecisionOverflow(i); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java new file mode 100644 index 0000000..1d4d84d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/** + * To be used to cast long and boolean to decimal. + * This works for boolean too because boolean is encoded as 0 + * for false and 1 for true. + */ +public class CastLongToDecimal extends FuncLongToDecimal { + + private static final long serialVersionUID = 1L; + + public CastLongToDecimal(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { + outV.vector[i].update(inV.vector[i], outV.scale); + outV.checkPrecisionOverflow(i); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java new file mode 100644 index 0000000..41762ed --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Cast a string to a decimal. + * + * If other functions besides cast need to take a string in and produce a decimal, + * you can subclass this class or convert it to a superclass, and + * implement different "func()" methods for each operation. + */ +public class CastStringToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public CastStringToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public CastStringToDecimal() { + super(); + } + + /** + * Convert input string to a decimal, at position i in the respective vectors. + */ + protected void func(DecimalColumnVector outV, BytesColumnVector inV, int i) { + String s; + try { + + /* If this conversion is frequently used, this should be optimized, + * e.g. by converting to decimal from the input bytes directly without + * making a new string. + */ + s = new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"); + outV.vector[i].update(s, outV.scale); + } catch (Exception e) { + + // for any exception in conversion to decimal, produce NULL + outV.noNulls = false; + outV.isNull[i] = true; + } + outV.checkPrecisionOverflow(i); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "decimal"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java new file mode 100644 index 0000000..37e92e1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/** + * To be used to cast timestamp to decimal. + */ +public class CastTimestampToDecimal extends FuncLongToDecimal { + + private static final long serialVersionUID = 1L; + + public CastTimestampToDecimal(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { + + // the resulting decimal value is 10e-9 * the input long value. + outV.vector[i].updateFixedPoint(inV.vector[i], (short) 9); + outV.vector[i].changeScaleDestructive(outV.scale); + outV.checkPrecisionOverflow(i); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java new file mode 100644 index 0000000..0e41a7c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary decimal functions returning strings that operate directly on the + * input and set the output. + */ +abstract public class DecimalToStringUnaryUDF extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public DecimalToStringUnaryUDF(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public DecimalToStringUnaryUDF() { + super(); + } + + abstract protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + outV.initBuffer(); + + if (n == 0) { + //Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "Decimal"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java new file mode 100644 index 0000000..cc6afa5 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary decimal functions and expressions returning doubles that + * operate directly on the input and set the output. + */ +public abstract class FuncDecimalToDouble extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncDecimalToDouble(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncDecimalToDouble() { + super(); + } + + abstract protected void func(DoubleColumnVector outV, DecimalColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "double"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java new file mode 100644 index 0000000..eeac8f1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary decimal functions and expressions returning integers that + * operate directly on the input and set the output. + */ +public abstract class FuncDecimalToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncDecimalToLong(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncDecimalToLong() { + super(); + } + + abstract protected void func(LongColumnVector outV, DecimalColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java new file mode 100644 index 0000000..8b2a6f0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary double functions and expressions returning decimals that + * operate directly on the input and set the output. + */ +public abstract class FuncDoubleToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncDoubleToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncDoubleToDecimal() { + super(); + } + + abstract protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DoubleColumnVector inV = (DoubleColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "decimal"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DOUBLE) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java new file mode 100644 index 0000000..18d1dbb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary long functions and expressions returning decimals that + * operate directly on the input and set the output. + */ +public abstract class FuncLongToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncLongToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncLongToDecimal() { + super(); + } + + abstract protected void func(DecimalColumnVector outV, LongColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "decimal"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index aa2f039..832463d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -27,7 +27,9 @@ import junit.framework.Assert; +import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -178,4 +180,365 @@ public void testCastBooleanToString() { StringExpr.compare(t, 0, t.length, resultV.vector[1], resultV.start[1], resultV.length[1])); } + + @Test + public void testCastDecimalToLong() { + + // test basic case + VectorizedRowBatch b = getBatchDecimalLong(); + VectorExpression expr = new CastDecimalToLong(0, 1); + expr.evaluate(b); + LongColumnVector r = (LongColumnVector) b.cols[1]; + assertEquals(1, r.vector[0]); + assertEquals(-2, r.vector[1]); + assertEquals(9999999999999999L, r.vector[2]); + + // test with nulls in input + b = getBatchDecimalLong(); + b.cols[0].noNulls = false; + b.cols[0].isNull[1] = true; + expr.evaluate(b); + r = (LongColumnVector) b.cols[1]; + assertFalse(r.noNulls); + assertTrue(r.isNull[1]); + assertFalse(r.isNull[0]); + assertEquals(1, r.vector[0]); + + // test repeating case + b = getBatchDecimalLong(); + b.cols[0].isRepeating = true; + expr.evaluate(b); + r = (LongColumnVector) b.cols[1]; + assertTrue(r.isRepeating); + assertEquals(1, r.vector[0]); + + // test repeating nulls case + b = getBatchDecimalLong(); + b.cols[0].isRepeating = true; + b.cols[0].noNulls = false; + b.cols[0].isNull[0] = true; + expr.evaluate(b); + r = (LongColumnVector) b.cols[1]; + assertTrue(r.isRepeating); + assertTrue(r.isNull[0]); + } + + @Test + /* Just spot check the basic case because code path is the same as + * for cast of decimal to long due to inheritance. + */ + public void testCastDecimalToBoolean() { + VectorizedRowBatch b = getBatchDecimalLong(); + VectorExpression expr = new CastDecimalToBoolean(0, 1); + DecimalColumnVector in = (DecimalColumnVector) b.cols[0]; + in.vector[1].update(0); + expr.evaluate(b); + LongColumnVector r = (LongColumnVector) b.cols[1]; + assertEquals(1, r.vector[0]); + assertEquals(0, r.vector[1]); + assertEquals(1, r.vector[2]); + } + + private VectorizedRowBatch getBatchDecimalLong() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DecimalColumnVector dv; + short scale = 2; + b.cols[0] = dv = new DecimalColumnVector(18, scale); + b.cols[1] = new LongColumnVector(); + + b.size = 3; + + dv.vector[0].update("1.1", scale); + dv.vector[1].update("-2.2", scale); + dv.vector[2].update("9999999999999999.00", scale); + + return b; + } + + @Test + public void testCastDecimalToDouble() { + + final double eps = 0.000001d; // tolerance to check double equality + + // test basic case + VectorizedRowBatch b = getBatchDecimalDouble(); + VectorExpression expr = new CastDecimalToDouble(0, 1); + expr.evaluate(b); + DoubleColumnVector r = (DoubleColumnVector) b.cols[1]; + assertEquals(1.1d, r.vector[0], eps); + assertEquals(-2.2d, r.vector[1], eps); + assertEquals(9999999999999999.0d, r.vector[2], eps); + + // test with nulls in input + b = getBatchDecimalDouble(); + b.cols[0].noNulls = false; + b.cols[0].isNull[1] = true; + expr.evaluate(b); + r = (DoubleColumnVector) b.cols[1]; + assertFalse(r.noNulls); + assertTrue(r.isNull[1]); + assertFalse(r.isNull[0]); + assertEquals(1.1d, r.vector[0], eps); + + // test repeating case + b = getBatchDecimalDouble(); + b.cols[0].isRepeating = true; + expr.evaluate(b); + r = (DoubleColumnVector) b.cols[1]; + assertTrue(r.isRepeating); + assertEquals(1.1d, r.vector[0], eps); + + // test repeating nulls case + b = getBatchDecimalDouble(); + b.cols[0].isRepeating = true; + b.cols[0].noNulls = false; + b.cols[0].isNull[0] = true; + expr.evaluate(b); + r = (DoubleColumnVector) b.cols[1]; + assertTrue(r.isRepeating); + assertTrue(r.isNull[0]); + } + + private VectorizedRowBatch getBatchDecimalDouble() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DecimalColumnVector dv; + short scale = 2; + b.cols[0] = dv = new DecimalColumnVector(18, scale); + b.cols[1] = new DoubleColumnVector(); + + b.size = 3; + + dv.vector[0].update("1.1", scale); + dv.vector[1].update("-2.2", scale); + dv.vector[2].update("9999999999999999.00", scale); + + return b; + } + + @Test + public void testCastDecimalToString() { + VectorizedRowBatch b = getBatchDecimalString(); + VectorExpression expr = new CastDecimalToString(0, 1); + expr.evaluate(b); + BytesColumnVector r = (BytesColumnVector) b.cols[1]; + + byte[] v = toBytes("1.10"); + Assert.assertEquals(0, + StringExpr.compare(v, 0, v.length, + r.vector[0], r.start[0], r.length[0])); + + v = toBytes("-2.20"); + Assert.assertEquals(0, + StringExpr.compare(v, 0, v.length, + r.vector[1], r.start[1], r.length[1])); + + v = toBytes("9999999999999999.00"); + Assert.assertEquals(0, + StringExpr.compare(v, 0, v.length, + r.vector[2], r.start[2], r.length[2])); + } + + private VectorizedRowBatch getBatchDecimalString() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DecimalColumnVector dv; + short scale = 2; + b.cols[0] = dv = new DecimalColumnVector(18, scale); + b.cols[1] = new BytesColumnVector(); + + b.size = 3; + + dv.vector[0].update("1.1", scale); + dv.vector[1].update("-2.2", scale); + dv.vector[2].update("9999999999999999.00", scale); + + return b; + } + + @Test + public void testCastDecimalToTimestamp() { + VectorizedRowBatch b = getBatchDecimalLong2(); + VectorExpression expr = new CastDecimalToTimestamp(0, 1); + expr.evaluate(b); + LongColumnVector r = (LongColumnVector) b.cols[1]; + assertEquals(1111111111L, r.vector[0]); + assertEquals(-2222222222L, r.vector[1]); + assertEquals(31536000999999999L, r.vector[2]); + } + + private VectorizedRowBatch getBatchDecimalLong2() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DecimalColumnVector dv; + short scale = 9; + b.cols[0] = dv = new DecimalColumnVector(18, scale); + b.cols[1] = new LongColumnVector(); + + b.size = 3; + + dv.vector[0].update("1.111111111", scale); + dv.vector[1].update("-2.222222222", scale); + dv.vector[2].update("31536000.999999999", scale); + + return b; + } + + @Test + public void testCastLongToDecimal() { + VectorizedRowBatch b = getBatchLongDecimal(); + VectorExpression expr = new CastLongToDecimal(0, 1); + expr.evaluate(b); + DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; + assertTrue(r.vector[0].equals(new Decimal128(0, (short) 2))); + assertTrue(r.vector[1].equals(new Decimal128(-1, (short) 2))); + assertTrue(r.vector[2].equals(new Decimal128(99999999999999L, (short) 2))); + } + + private VectorizedRowBatch getBatchLongDecimal() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + LongColumnVector lv; + b.cols[0] = lv = new LongColumnVector(); + b.cols[1] = new DecimalColumnVector(18, 2); + lv.vector[0] = 0; + lv.vector[1] = -1; + lv.vector[2] = 99999999999999L; + return b; + } + + @Test + public void testCastDoubleToDecimal() { + VectorizedRowBatch b = getBatchDoubleDecimal(); + VectorExpression expr = new CastDoubleToDecimal(0, 1); + expr.evaluate(b); + DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; + + assertTrue(r.vector[0].equals(new Decimal128(0, r.scale))); + assertTrue(r.vector[1].equals(new Decimal128(-1, r.scale))); + assertTrue(r.vector[2].equals(new Decimal128("99999999999999.0", r.scale))); + } + + private VectorizedRowBatch getBatchDoubleDecimal() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DoubleColumnVector dv; + short scale = 2; + b.cols[0] = dv = new DoubleColumnVector(); + b.cols[1] = new DecimalColumnVector(18, scale); + + b.size = 3; + + dv.vector[0] = 0d; + dv.vector[1] = -1d; + dv.vector[2] = 99999999999999.0d; + + return b; + } + + @Test + public void testCastStringToDecimal() { + VectorizedRowBatch b = getBatchStringDecimal(); + VectorExpression expr = new CastStringToDecimal(0, 1); + expr.evaluate(b); + DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; + assertTrue(r.vector[0].equals(new Decimal128("1.10", r.scale))); + assertTrue(r.vector[1].equals(new Decimal128("-2.20", r.scale))); + assertTrue(r.vector[2].equals(new Decimal128("99999999999999.0", r.scale))); + } + + private VectorizedRowBatch getBatchStringDecimal() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + BytesColumnVector bv; + b.cols[0] = bv = new BytesColumnVector(); + b.cols[1] = new DecimalColumnVector(18, 2); + + bv.initBuffer(); + + byte[] x0 = toBytes("1.10"); + byte[] x1 = toBytes("-2.20"); + byte[] x2 = toBytes("99999999999999.0"); + + bv.setVal(0, x0, 0, x0.length); + bv.setVal(1, x1, 0, x1.length); + bv.setVal(2, x2, 0, x2.length); + + return b; + } + + @Test + public void testCastTimestampToDecimal() { + + // The input timestamps are stored as long values + // measured in nanoseconds from the epoch. + VectorizedRowBatch b = getBatchLongDecimal(); + VectorExpression expr = new CastTimestampToDecimal(0, 1); + LongColumnVector inL = (LongColumnVector) b.cols[0]; + inL.vector[1] = -1990000000L; + expr.evaluate(b); + DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; + assertTrue(r.vector[0].equals(new Decimal128(0, (short) 2))); + assertTrue(r.vector[1].equals(new Decimal128("-1.99", (short) 2))); + assertTrue(r.vector[2].equals(new Decimal128("100000.00", (short) 2))); + + // Try again with a value that won't fit in 5 digits, to make + // sure that NULL is produced. + b = getBatchLongDecimalPrec5Scale2(); + expr.evaluate(b); + r = (DecimalColumnVector) b.cols[1]; + assertFalse(r.noNulls); + assertFalse(r.isNull[0]); + assertFalse(r.isNull[1]); + assertTrue(r.isNull[2]); + } + + /* This batch has output decimal column precision 5 and scale 2. + * The goal is to allow testing of input long values that, when + * converted to decimal, will not fit in the given precision. + * Then it will be possible to check that the results are NULL. + */ + private VectorizedRowBatch getBatchLongDecimalPrec5Scale2() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + LongColumnVector lv; + b.cols[0] = lv = new LongColumnVector(); + b.cols[1] = new DecimalColumnVector(5, 2); + lv.vector[0] = 0; + lv.vector[1] = -1; + lv.vector[2] = 99999999999999L; + return b; + } + + @Test + public void testCastDecimalToDecimal() { + + // test casting from one precision and scale to another. + VectorizedRowBatch b = getBatchDecimalDecimal(); + VectorExpression expr = new CastDecimalToDecimal(0, 1); + expr.evaluate(b); + DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; + assertTrue(r.vector[0].equals(new Decimal128("10.00", (short) 2))); + assertFalse(r.noNulls); + assertTrue(r.isNull[1]); + + // test an increase in precision/scale + b = getBatchDecimalDecimal(); + expr = new CastDecimalToDecimal(1, 0); + expr.evaluate(b); + r = (DecimalColumnVector) b.cols[0]; + assertTrue(r.vector[0].equals(new Decimal128("100.01", (short) 4))); + assertTrue(r.vector[1].equals(new Decimal128("-200.02", (short) 4))); + assertTrue(r.noNulls); + } + + private VectorizedRowBatch getBatchDecimalDecimal() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + + DecimalColumnVector v0, v1; + b.cols[0] = v0 = new DecimalColumnVector(18, 4); + b.cols[1] = v1 = new DecimalColumnVector(5, 2); + + v0.vector[0].update(new Decimal128("10.0001", (short) 4)); + v0.vector[1].update(new Decimal128("-9999999.9999", (short) 4)); + + v1.vector[0].update(new Decimal128("100.01", (short) 2)); + v1.vector[1].update(new Decimal128("-200.02", (short) 2)); + + b.size = 2; + return b; + } }