From 03833bf6f801a6b676cfaa9ee7f3965e2d871b3a Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Thu, 5 Jan 2012 18:08:35 -0800 Subject: [PATCH 01/12] Query parser changes to add the DECIMAL definition --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g | 3 +++ .../hadoop/hive/ql/parse/TypeCheckProcFactory.java | 2 ++ 2 files changed, 5 insertions(+), 0 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g index 888bf47..6193795 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g @@ -103,6 +103,7 @@ TOK_DATETIME; TOK_TIMESTAMP; TOK_STRING; TOK_BINARY; +TOK_DECIMAL; TOK_LIST; TOK_STRUCT; TOK_MAP; @@ -1314,6 +1315,7 @@ primitiveType | KW_TIMESTAMP -> TOK_TIMESTAMP | KW_STRING -> TOK_STRING | KW_BINARY -> TOK_BINARY + | KW_DECIMAL -> TOK_DECIMAL ; listType @@ -2214,6 +2216,7 @@ KW_DOUBLE: 'DOUBLE'; KW_DATE: 'DATE'; KW_DATETIME: 'DATETIME'; KW_TIMESTAMP: 'TIMESTAMP'; +KW_DECIMAL: 'DECIMAL'; KW_STRING: 'STRING'; KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 59e55ae..14fb75e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -488,6 +488,8 @@ public final class TypeCheckProcFactory { Constants.BINARY_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_TIMESTAMP, Constants.TIMESTAMP_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_DECIMAL, + Constants.DECIMAL_TYPE_NAME); } public static boolean isRedundantConversionFunction(ASTNode expr, -- 1.7.3.4 From 343a3fee046595433ded4b232419a5e8308991ce Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Sat, 7 Jan 2012 19:57:07 -0800 Subject: [PATCH 02/12] First checkin of casting functionality for decimal data types and a clientpositive test case --- .../hadoop/hive/metastore/MetaStoreUtils.java | 4 +- .../hadoop/hive/ql/exec/FunctionRegistry.java | 7 +- .../hadoop/hive/ql/parse/DDLSemanticAnalyzer.java | 1 + .../hive/ql/udf/generic/GenericUDFToDecimal.java | 74 ++++++++++++++++++++ ql/src/test/queries/clientpositive/decimal_1.q | 16 ++++ .../primitive/PrimitiveObjectInspectorFactory.java | 23 +++++-- .../WritableConstantBigDecimalObjectInspector.java | 40 +++++++++++ 7 files changed, 155 insertions(+), 10 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java create mode 100644 ql/src/test/queries/clientpositive/decimal_1.q create mode 100644 serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index c10012d..6dbd519 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -362,7 +362,7 @@ public class MetaStoreUtils { org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME, "string"); typeToThriftTypeMap.put( org.apache.hadoop.hive.serde.Constants.BINARY_TYPE_NAME, "binary"); - // These 3 types are not supported yet. + // These 4 types are not supported yet. // We should define a complex type date in thrift that contains a single int // member, and DynamicSerDe // should convert it to date type at runtime. @@ -373,6 +373,8 @@ public class MetaStoreUtils { typeToThriftTypeMap .put(org.apache.hadoop.hive.serde.Constants.TIMESTAMP_TYPE_NAME, "timestamp"); + typeToThriftTypeMap.put( + org.apache.hadoop.hive.serde.Constants.DECIMAL_TYPE_NAME, "decimal"); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 475ae7f..590654f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -166,8 +166,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap; @@ -193,6 +193,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; @@ -220,10 +221,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.ReflectionUtils; - import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** @@ -393,6 +392,8 @@ public final class FunctionRegistry { GenericUDFTimestamp.class); registerGenericUDF(Constants.BINARY_TYPE_NAME, GenericUDFToBinary.class); + registerGenericUDF(Constants.DECIMAL_TYPE_NAME, + GenericUDFToDecimal.class); // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index c80403d..da1147e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -139,6 +139,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { TokenToTypeName.put(HiveParser.TOK_DATE, Constants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, Constants.DATETIME_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_TIMESTAMP, Constants.TIMESTAMP_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_DECIMAL, Constants.DECIMAL_TYPE_NAME); } public static String getTypeName(int token) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java new file mode 100644 index 0000000..d6776d1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.BigDecimalConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +@Description(name = "decimal", value = "_FUNC_(a) - cast a to decimal") +public class GenericUDFToDecimal extends GenericUDF { + + private PrimitiveObjectInspector argumentOI; + private BigDecimalConverter bdConverter; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 1) { + throw new UDFArgumentLengthException( + "The function DECIMAL requires at least one argument, got " + + arguments.length); + } + try { + argumentOI = (PrimitiveObjectInspector) arguments[0]; + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function DECIMAL takes only primitive types"); + } + + bdConverter = new BigDecimalConverter(argumentOI, + PrimitiveObjectInspectorFactory.writableBigDecimalObjectInspector); + return PrimitiveObjectInspectorFactory.writableBigDecimalObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + return bdConverter.convert(o0); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 1); + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS DECIMAL)"); + return sb.toString(); + } + +} diff --git a/ql/src/test/queries/clientpositive/decimal_1.q b/ql/src/test/queries/clientpositive/decimal_1.q new file mode 100644 index 0000000..084fce1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/decimal_1.q @@ -0,0 +1,16 @@ +drop table decimal_1; + +create table decimal_1 (t decimal); + +insert overwrite table decimal_1 + select cast('17.29' as decimal) from src limit 1; +select cast(t as boolean) from decimal_1 limit 1; +select cast(t as tinyint) from decimal_1 limit 1; +select cast(t as smallint) from decimal_1 limit 1; +select cast(t as int) from decimal_1 limit 1; +select cast(t as bigint) from decimal_1 limit 1; +select cast(t as float) from decimal_1 limit 1; +select cast(t as double) from decimal_1 limit 1; +select cast(t as string) from decimal_1 limit 1; + +drop table decimal_1; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java index 94849e2..597572f 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -20,20 +20,21 @@ package org.apache.hadoop.hive.serde2.objectinspector.primitive; import java.util.HashMap; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * PrimitiveObjectInspectorFactory is the primary way to create new @@ -68,6 +69,8 @@ public final class PrimitiveObjectInspectorFactory { new JavaTimestampObjectInspector(); public static final JavaBinaryObjectInspector javaByteArrayObjectInspector = new JavaBinaryObjectInspector(); + public static final JavaBigDecimalObjectInspector javaBigDecimalObjectInspector = + new JavaBigDecimalObjectInspector(); public static final WritableBooleanObjectInspector writableBooleanObjectInspector = new WritableBooleanObjectInspector(); @@ -91,6 +94,8 @@ public final class PrimitiveObjectInspectorFactory { new WritableTimestampObjectInspector(); public static final WritableBinaryObjectInspector writableBinaryObjectInspector = new WritableBinaryObjectInspector(); + public static final WritableBigDecimalObjectInspector writableBigDecimalObjectInspector = + new WritableBigDecimalObjectInspector(); private static HashMap cachedPrimitiveWritableInspectorCache = new HashMap(); @@ -117,6 +122,8 @@ public final class PrimitiveObjectInspectorFactory { writableTimestampObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.BINARY, writableBinaryObjectInspector); + cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.DECIMAL, + writableBigDecimalObjectInspector); } private static HashMap cachedPrimitiveJavaInspectorCache = @@ -144,6 +151,8 @@ public final class PrimitiveObjectInspectorFactory { javaTimestampObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.BINARY, javaByteArrayObjectInspector); + cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.DECIMAL, + javaBigDecimalObjectInspector); } /** @@ -190,6 +199,8 @@ public final class PrimitiveObjectInspectorFactory { return new WritableConstantStringObjectInspector((Text)value); case TIMESTAMP: return new WritableConstantTimestampObjectInspector((TimestampWritable)value); + case DECIMAL: + return new WritableConstantBigDecimalObjectInspector((BigDecimalWritable)value); case VOID: return new WritableVoidObjectInspector(); default: diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java new file mode 100644 index 0000000..672b106 --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; + +/** + * A WritableConstantBigDecimalObjectInspector is a WritableBigDecimalObjectInspector + * that implements ConstantObjectInspector. + */ +public class WritableConstantBigDecimalObjectInspector extends WritableBigDecimalObjectInspector + implements ConstantObjectInspector { + + private final BigDecimalWritable value; + + WritableConstantBigDecimalObjectInspector(BigDecimalWritable value) { + this.value = value; + } + + @Override + public BigDecimalWritable getWritableConstantValue() { + return value; + } +} -- 1.7.3.4 From a789827a0bb96c31c4cff5a259a91f3a6890d8f5 Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Sun, 8 Jan 2012 00:07:33 -0800 Subject: [PATCH 03/12] First pass of BigDecimal UDFs --- .../hadoop/hive/ql/udf/UDFBaseNumericOp.java | 3 +++ .../hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java | 3 +++ .../org/apache/hadoop/hive/ql/udf/UDFOPMinus.java | 12 ++++++++++++ .../org/apache/hadoop/hive/ql/udf/UDFOPMod.java | 11 +++++++++++ .../apache/hadoop/hive/ql/udf/UDFOPMultiply.java | 11 +++++++++++ .../apache/hadoop/hive/ql/udf/UDFOPNegative.java | 10 ++++++++++ .../org/apache/hadoop/hive/ql/udf/UDFOPPlus.java | 13 ++++++++++++- .../apache/hadoop/hive/ql/udf/UDFOPPositive.java | 6 ++++++ .../org/apache/hadoop/hive/ql/udf/UDFPosMod.java | 15 +++++++++++++++ 9 files changed, 83 insertions(+), 1 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java index 63d0255..14c16ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.NumericOpMethodResolver; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -48,6 +49,7 @@ public abstract class UDFBaseNumericOp extends UDF { protected LongWritable longWritable = new LongWritable(); protected FloatWritable floatWritable = new FloatWritable(); protected DoubleWritable doubleWritable = new DoubleWritable(); + protected BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public abstract ByteWritable evaluate(ByteWritable a, ByteWritable b); @@ -61,4 +63,5 @@ public abstract class UDFBaseNumericOp extends UDF { public abstract DoubleWritable evaluate(DoubleWritable a, DoubleWritable b); + public abstract BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java index b220805..cb7dca4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -45,6 +46,7 @@ public abstract class UDFBaseNumericUnaryOp extends UDF { protected LongWritable longWritable = new LongWritable(); protected FloatWritable floatWritable = new FloatWritable(); protected DoubleWritable doubleWritable = new DoubleWritable(); + protected BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public abstract ByteWritable evaluate(ByteWritable a); @@ -58,4 +60,5 @@ public abstract class UDFBaseNumericUnaryOp extends UDF { public abstract DoubleWritable evaluate(DoubleWritable a); + public abstract BigDecimalWritable evaluate(BigDecimalWritable a); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java index 8ed1cc6..f884b9a 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -107,4 +108,15 @@ public class UDFOPMinus extends UDFBaseNumericOp { doubleWritable.set(a.get() - b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().subtract(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java index 1935f03..95bbf45 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -107,4 +108,14 @@ public class UDFOPMod extends UDFBaseNumericOp { doubleWritable.set(a.get() % b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().remainder(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java index ce2c515..9058651 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -107,4 +108,14 @@ public class UDFOPMultiply extends UDFBaseNumericOp { doubleWritable.set(a.get() * b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().multiply(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java index 2378df2..3c14fef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -90,4 +91,13 @@ public class UDFOPNegative extends UDFBaseNumericUnaryOp { return doubleWritable; } + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } + bigDecimalWritable.set(a.getBigDecimal().negate()); + return bigDecimalWritable; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java index d46c3c0..6368f4d 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -31,7 +32,7 @@ import org.apache.hadoop.io.LongWritable; * better performance and type checking (so we know int + int is still an int * instead of a double); otherwise a single method that takes (Number a, Number * b) and use a.doubleValue() == b.doubleValue() is enough. - * + * * The case of int + double will be handled by implicit type casting using * UDFRegistry.implicitConvertable method. */ @@ -113,4 +114,14 @@ public class UDFOPPlus extends UDFBaseNumericOp { return doubleWritable; } + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().add(b.getBigDecimal())); + return bigDecimalWritable; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java index c2c45e4..0711890 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -66,4 +67,9 @@ public class UDFOPPositive extends UDFBaseNumericUnaryOp { return a; } + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a) { + return a; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java index 3b86e9c..4467a90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; + import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -108,4 +111,16 @@ public class UDFPosMod extends UDFBaseNumericOp { doubleWritable.set(((a.get() % b.get()) + b.get()) % b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + BigDecimal av = a.getBigDecimal(); + BigDecimal bv = b.getBigDecimal(); + bigDecimalWritable.set(av.remainder(bv).add(bv).remainder(bv)); + return bigDecimalWritable; + } } -- 1.7.3.4 From fce2713ce782d3ff96f2af751b23db39ddc4970e Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Sun, 8 Jan 2012 00:22:08 -0800 Subject: [PATCH 04/12] Add UDFs for casting to/from decimal types --- .../apache/hadoop/hive/ql/udf/UDFToBoolean.java | 14 +++++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFToByte.java | 11 ++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFToDouble.java | 11 ++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFToFloat.java | 12 +++++++++++- .../apache/hadoop/hive/ql/udf/UDFToInteger.java | 12 +++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFToLong.java | 12 +++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFToShort.java | 10 ++++++++++ .../org/apache/hadoop/hive/ql/udf/UDFToString.java | 12 +++++++++++- ql/src/test/queries/clientpositive/decimal_1.q | 1 + 9 files changed, 88 insertions(+), 7 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index f3afd33..e7001ae 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; + import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -35,7 +38,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToBoolean extends UDF { - private BooleanWritable booleanWritable = new BooleanWritable(); + private final BooleanWritable booleanWritable = new BooleanWritable(); public UDFToBoolean() { } @@ -172,4 +175,13 @@ public class UDFToBoolean extends UDF { } } + public BooleanWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + booleanWritable.set(!BigDecimal.ZERO.equals(i.getBigDecimal())); + return booleanWritable; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java index 1b3b744..c5830ea 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -36,7 +37,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToByte extends UDF { - private ByteWritable byteWritable = new ByteWritable(); + private final ByteWritable byteWritable = new ByteWritable(); public UDFToByte() { } @@ -181,4 +182,12 @@ public class UDFToByte extends UDF { } } + public ByteWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + byteWritable.set(i.getBigDecimal().byteValue()); + return byteWritable; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java index ce4660c..c57e31e 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -34,7 +35,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToDouble extends UDF { - private DoubleWritable doubleWritable = new DoubleWritable(); + private final DoubleWritable doubleWritable = new DoubleWritable(); public UDFToDouble() { } @@ -183,4 +184,12 @@ public class UDFToDouble extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + doubleWritable.set(i.getBigDecimal().doubleValue()); + return doubleWritable; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java index c6b197e..61591e9 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -35,7 +36,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToFloat extends UDF { - private FloatWritable floatWritable = new FloatWritable(); + private final FloatWritable floatWritable = new FloatWritable(); public UDFToFloat() { } @@ -184,4 +185,13 @@ public class UDFToFloat extends UDF { } } + public FloatWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + floatWritable.set(i.getBigDecimal().floatValue()); + return floatWritable; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java index 9b9d7df..018b3de 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -36,7 +37,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToInteger extends UDF { - private IntWritable intWritable = new IntWritable(); + private final IntWritable intWritable = new IntWritable(); public UDFToInteger() { } @@ -188,4 +189,13 @@ public class UDFToInteger extends UDF { } } + public IntWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + intWritable.set(i.getBigDecimal().intValue()); + return intWritable; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java index c7ea66d..426bc64 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -36,7 +37,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToLong extends UDF { - private LongWritable longWritable = new LongWritable(); + private final LongWritable longWritable = new LongWritable(); public UDFToLong() { } @@ -192,4 +193,13 @@ public class UDFToLong extends UDF { } } + public LongWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + longWritable.set(i.getBigDecimal().longValue()); + return longWritable; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java index 558d405..5f42865 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -181,4 +182,13 @@ public class UDFToShort extends UDF { } } + public ShortWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + shortWritable.set(i.getBigDecimal().shortValue()); + return shortWritable; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java index 4a38f8c..1d06eb3 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -141,10 +142,19 @@ public class UDFToString extends UDF { } } + public Text evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + t.set(i.toString()); + return t; + } + } + public Text evaluate (BytesWritable bw) { if (null == bw) { return null; -} + } t.set(bw.getBytes(),0,bw.getLength()); return t; } diff --git a/ql/src/test/queries/clientpositive/decimal_1.q b/ql/src/test/queries/clientpositive/decimal_1.q index 084fce1..063afa8 100644 --- a/ql/src/test/queries/clientpositive/decimal_1.q +++ b/ql/src/test/queries/clientpositive/decimal_1.q @@ -1,6 +1,7 @@ drop table decimal_1; create table decimal_1 (t decimal); +alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; insert overwrite table decimal_1 select cast('17.29' as decimal) from src limit 1; -- 1.7.3.4 From d87ae4ccda515f77785c9919e8eca59fe43067ff Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Sun, 8 Jan 2012 00:31:08 -0800 Subject: [PATCH 05/12] Bug fix for BigDecimalWritable set method-- correctly set the scale value --- .../hadoop/hive/serde2/io/BigDecimalWritable.java | 13 ++++++++++++- 1 files changed, 12 insertions(+), 1 deletions(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java index 9c77805..3543224 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableUtils; @@ -56,6 +57,7 @@ public class BigDecimalWritable implements WritableComparable Date: Sun, 8 Jan 2012 23:03:24 -0800 Subject: [PATCH 06/12] First working version of decimal table type --- .../hadoop/hive/serde2/io/BigDecimalWritable.java | 28 +++++++++++++- .../serde2/lazybinary/LazyBinaryBigDecimal.java | 42 ++++++++++++++++++++ .../hive/serde2/lazybinary/LazyBinaryFactory.java | 3 + .../hive/serde2/lazybinary/LazyBinarySerDe.java | 10 +++++ .../hive/serde2/lazybinary/LazyBinaryUtils.java | 8 ++++ 5 files changed, 90 insertions(+), 1 deletions(-) create mode 100644 serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBigDecimal.java diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java index 3543224..342b8f0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java @@ -22,13 +22,20 @@ import java.io.DataOutput; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.util.Arrays; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableUtils; public class BigDecimalWritable implements WritableComparable { + static final private Log LOG = LogFactory.getLog(BigDecimalWritable.class); + private byte[] internalStorage = new byte[0]; private int scale; @@ -60,6 +67,24 @@ public class BigDecimalWritable implements WritableComparable { + + LazyBinaryBigDecimal(WritableBigDecimalObjectInspector oi) { + super(oi); + data = new BigDecimalWritable(); + } + + LazyBinaryBigDecimal(LazyBinaryBigDecimal copy) { + super(copy); + data = new BigDecimalWritable(copy.data); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + data.setFromBytes(bytes.getData(), start, length); + } + +} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java index 86f098f..3111cbc 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; @@ -75,6 +76,8 @@ public final class LazyBinaryFactory { return new LazyBinaryTimestamp((WritableTimestampObjectInspector) oi); case BINARY: return new LazyBinaryBinary((WritableBinaryObjectInspector) oi); + case DECIMAL: + return new LazyBinaryBigDecimal((WritableBigDecimalObjectInspector) oi); default: throw new RuntimeException("Internal error: no LazyBinaryObject for " + p); } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index 807a52b..823e3e0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -382,6 +384,14 @@ public class LazyBinarySerDe implements SerDe { t.writeToByteStream(byteStream); return warnedOnceNullMapKey; } + + case DECIMAL: { + BigDecimalObjectInspector bdoi = (BigDecimalObjectInspector) poi; + BigDecimalWritable t = bdoi.getPrimitiveWritableObject(obj); + t.writeToByteStream(byteStream); + return warnedOnceNullMapKey; + } + default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java index 4711676..d097355 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -203,6 +203,14 @@ public final class LazyBinaryUtils { recordInfo.elementSize += (byte) WritableUtils.decodeVIntSize(bytes[offset+4]); } break; + case DECIMAL: + // using vint instead of 4 bytes + LazyBinaryUtils.readVInt(bytes, offset, vInt); + recordInfo.elementOffset = 0; + recordInfo.elementSize = vInt.length; + LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt); + recordInfo.elementSize += vInt.length + vInt.value; + break; default: { throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); -- 1.7.3.4 From 72d4cd898e466c89db3d188c2ac35480a76c749d Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Sun, 8 Jan 2012 23:19:02 -0800 Subject: [PATCH 07/12] Check in the output file and cleanup the extra logging a bit --- ql/src/test/results/clientpositive/decimal_1.q.out | 117 ++++++++++++++++++++ .../hadoop/hive/serde2/io/BigDecimalWritable.java | 17 +-- 2 files changed, 123 insertions(+), 11 deletions(-) create mode 100644 ql/src/test/results/clientpositive/decimal_1.q.out diff --git a/ql/src/test/results/clientpositive/decimal_1.q.out b/ql/src/test/results/clientpositive/decimal_1.q.out new file mode 100644 index 0000000..4cf6e0a --- /dev/null +++ b/ql/src/test/results/clientpositive/decimal_1.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: drop table decimal_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table decimal_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table decimal_1 (t decimal) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table decimal_1 (t decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@decimal_1 +PREHOOK: query: alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 +PREHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +PREHOOK: query: select cast(t as boolean) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as boolean) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +true +PREHOOK: query: select cast(t as tinyint) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as tinyint) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as smallint) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as smallint) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as int) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as int) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as bigint) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as bigint) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as float) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as float) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as double) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as double) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as string) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as string) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17.29 +PREHOOK: query: drop table decimal_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: drop table decimal_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java index 342b8f0..62bf975 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java @@ -22,7 +22,6 @@ import java.io.DataOutput; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; -import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -67,22 +66,18 @@ public class BigDecimalWritable implements WritableComparable Date: Sun, 8 Jan 2012 23:44:34 -0800 Subject: [PATCH 08/12] Add the lazybinary analogue of the basic decimal test case --- ql/src/test/queries/clientpositive/decimal_2.q | 17 +++ ql/src/test/results/clientpositive/decimal_2.q.out | 117 ++++++++++++++++++++ 2 files changed, 134 insertions(+), 0 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/decimal_2.q create mode 100644 ql/src/test/results/clientpositive/decimal_2.q.out diff --git a/ql/src/test/queries/clientpositive/decimal_2.q b/ql/src/test/queries/clientpositive/decimal_2.q new file mode 100644 index 0000000..f2d7cb8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/decimal_2.q @@ -0,0 +1,17 @@ +drop table decimal_2; + +create table decimal_2 (t decimal); +alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'; + +insert overwrite table decimal_2 + select cast('17.29' as decimal) from src limit 1; +select cast(t as boolean) from decimal_2 limit 1; +select cast(t as tinyint) from decimal_2 limit 1; +select cast(t as smallint) from decimal_2 limit 1; +select cast(t as int) from decimal_2 limit 1; +select cast(t as bigint) from decimal_2 limit 1; +select cast(t as float) from decimal_2 limit 1; +select cast(t as double) from decimal_2 limit 1; +select cast(t as string) from decimal_2 limit 1; + +drop table decimal_2; diff --git a/ql/src/test/results/clientpositive/decimal_2.q.out b/ql/src/test/results/clientpositive/decimal_2.q.out new file mode 100644 index 0000000..284d9a5 --- /dev/null +++ b/ql/src/test/results/clientpositive/decimal_2.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table decimal_2 (t decimal) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table decimal_2 (t decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +PREHOOK: query: select cast(t as boolean) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as boolean) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +true +PREHOOK: query: select cast(t as tinyint) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as tinyint) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as smallint) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as smallint) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as int) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as int) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as bigint) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as bigint) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as float) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as float) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as double) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as double) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as string) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as string) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17.29 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] -- 1.7.3.4 From 02144728fca6c5f6e767eb0e6909d8ef57bd24c1 Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Mon, 9 Jan 2012 11:14:35 -0800 Subject: [PATCH 09/12] Add the impl of the SUM function for decimal types --- .../hadoop/hive/ql/udf/generic/GenericUDAFSum.java | 88 ++++++++++++++++++++ 1 files changed, 88 insertions(+), 0 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index 5a20f87..f80491d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -17,12 +17,15 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.math.BigDecimal; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -66,6 +69,8 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { case DOUBLE: case STRING: return new GenericUDAFSumDouble(); + case DECIMAL: + return new GenericUDAFSumBigDecimal(); case BOOLEAN: default: throw new UDFArgumentTypeException(0, @@ -75,6 +80,89 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { } /** + * GenericUDAFSumBigDecimal. + * + */ + public static class GenericUDAFSumBigDecimal extends GenericUDAFEvaluator { + private PrimitiveObjectInspector inputOI; + private BigDecimalWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + assert (parameters.length == 1); + super.init(m, parameters); + result = new BigDecimalWritable(BigDecimal.ZERO); + inputOI = (PrimitiveObjectInspector) parameters[0]; + return PrimitiveObjectInspectorFactory.writableBigDecimalObjectInspector; + } + + /** class for storing double sum value. */ + static class SumBigDecimalAgg implements AggregationBuffer { + boolean empty; + BigDecimal sum; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + SumBigDecimalAgg agg = new SumBigDecimalAgg(); + reset(agg); + return agg; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + SumBigDecimalAgg bdAgg = (SumBigDecimalAgg) agg; + bdAgg.empty = true; + bdAgg.sum = BigDecimal.ZERO; + } + + boolean warned = false; + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + assert (parameters.length == 1); + try { + merge(agg, parameters[0]); + } catch (NumberFormatException e) { + if (!warned) { + warned = true; + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); + LOG + .warn(getClass().getSimpleName() + + " ignoring similar exceptions."); + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return terminate(agg); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + SumBigDecimalAgg myagg = (SumBigDecimalAgg) agg; + myagg.empty = false; + myagg.sum = myagg.sum.add( + PrimitiveObjectInspectorUtils.getBigDecimal(partial, inputOI)); + } + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + SumBigDecimalAgg myagg = (SumBigDecimalAgg) agg; + if (myagg.empty) { + return null; + } + result.set(myagg.sum); + return result; + } + + } + + /** * GenericUDAFSumDouble. * */ -- 1.7.3.4 From 19f197f2b955b35df6cc21ff7565e7b4774e7325 Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Mon, 9 Jan 2012 12:40:48 -0800 Subject: [PATCH 10/12] Add support for primitive numeric operations involving decimal types --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 3 ++- .../org/apache/hadoop/hive/serde2/SerDeUtils.java | 5 +++++ 2 files changed, 7 insertions(+), 1 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 590654f..33422b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -622,7 +622,8 @@ public final class FunctionRegistry { registerNumericType(Constants.BIGINT_TYPE_NAME, 4); registerNumericType(Constants.FLOAT_TYPE_NAME, 5); registerNumericType(Constants.DOUBLE_TYPE_NAME, 6); - registerNumericType(Constants.STRING_TYPE_NAME, 7); + registerNumericType(Constants.DECIMAL_TYPE_NAME, 7); + registerNumericType(Constants.STRING_TYPE_NAME, 8); } /** diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index e39ea3d..392a8d3 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -278,6 +279,10 @@ public final class SerDeUtils { sb.append(txt.toString()); break; } + case DECIMAL: { + sb.append(((BigDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); + break; + } default: throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory()); -- 1.7.3.4 From c22fbc053eab6f256080a185b0cbde500ac05185 Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Mon, 9 Jan 2012 14:04:12 -0800 Subject: [PATCH 11/12] Add default decimal impls for all of the main numeric UDAFs --- .../ql/udf/generic/GenericUDAFCorrelation.java | 2 + .../hive/ql/udf/generic/GenericUDAFCovariance.java | 2 + .../udf/generic/GenericUDAFCovarianceSample.java | 2 + .../udf/generic/GenericUDAFHistogramNumeric.java | 9 ++++--- .../udf/generic/GenericUDAFPercentileApprox.java | 21 ++++++++++--------- .../hadoop/hive/ql/udf/generic/GenericUDAFStd.java | 5 ++- .../hive/ql/udf/generic/GenericUDAFStdSample.java | 3 +- .../hive/ql/udf/generic/GenericUDAFVariance.java | 11 +++++---- .../ql/udf/generic/GenericUDAFVarianceSample.java | 3 +- 9 files changed, 35 insertions(+), 23 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java index 43ee547..d5c8e14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java @@ -102,6 +102,7 @@ public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -110,6 +111,7 @@ public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: return new GenericUDAFCorrelationEvaluator(); case STRING: case BOOLEAN: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java index fdcedfb..f7f24f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java @@ -93,6 +93,7 @@ public class GenericUDAFCovariance extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -101,6 +102,7 @@ public class GenericUDAFCovariance extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: return new GenericUDAFCovarianceEvaluator(); case STRING: case BOOLEAN: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java index ef3023e..ecf7151 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java @@ -67,6 +67,7 @@ public class GenericUDAFCovarianceSample extends GenericUDAFCovariance { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -75,6 +76,7 @@ public class GenericUDAFCovarianceSample extends GenericUDAFCovariance { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: return new GenericUDAFCovarianceSampleEvaluator(); case STRING: case BOOLEAN: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java index e0f81e0..b31f1da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java @@ -45,7 +45,7 @@ import org.apache.hadoop.util.StringUtils; /** * Computes an approximate histogram of a numerical column using a user-specified number of bins. - * + * * The output is an array of (x,y) pairs as Hive struct objects that represents the histogram's * bin centers and heights. */ @@ -72,7 +72,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { throw new UDFArgumentTypeException(parameters.length - 1, "Please specify exactly two arguments."); } - + // validate the first parameter, which is the expression to compute over if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, @@ -87,6 +87,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: break; case STRING: case BOOLEAN: @@ -170,7 +171,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { - // Return a single ArrayList where the first element is the number of histogram bins, + // Return a single ArrayList where the first element is the number of histogram bins, // and subsequent elements represent histogram (x,y) pairs. StdAgg myagg = (StdAgg) agg; return myagg.histogram.serialize(); @@ -233,7 +234,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { } - // Aggregation buffer definition and manipulation methods + // Aggregation buffer definition and manipulation methods static class StdAgg implements AggregationBuffer { NumericHistogram histogram; // the histogram object }; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java index 4193a97..eab6e1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java @@ -46,7 +46,7 @@ import org.apache.hadoop.util.StringUtils; /** * Computes an approximate percentile (quantile) from an approximate histogram, for very * large numbers of rows where the regular percentile() UDAF might run out of memory. - * + * * The input is a single double value or an array of double values representing the quantiles * requested. The output, corresponding to the input, is either an single double value or an * array of doubles that are the quantile values. @@ -59,7 +59,7 @@ import org.apache.hadoop.util.StringUtils; extended = "'expr' can be any numeric column, including doubles and floats, and 'pc' is " + "either a single double/float with a requested percentile, or an array of double/" + "float with multiple percentiles. If 'nb' is not specified, the default " + - "approximation is done with 10,000 histogram bins, which means that if there are " + + "approximation is done with 10,000 histogram bins, which means that if there are " + "10,000 or fewer unique values in 'expr', you can expect an exact result. The " + "percentile() function always computes an exact percentile and can run out of " + "memory if there are too many unique values in a column, which necessitates " + @@ -77,7 +77,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { throw new UDFArgumentTypeException(parameters.length - 1, "Please specify either two or three arguments."); } - + // Validate the first parameter, which is the expression to compute over. This should be a // numeric primitive type. if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { @@ -93,6 +93,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: break; default: throw new UDFArgumentTypeException(0, @@ -147,7 +148,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { // Also make sure it is a constant. if (!ObjectInspectorUtils.isConstantObjectInspector(parameters[1])) { throw new UDFArgumentTypeException(1, - "The second argument must be a constant, but " + parameters[1].getTypeName() + + "The second argument must be a constant, but " + parameters[1].getTypeName() + " was passed instead."); } @@ -172,7 +173,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { // Also make sure it is a constant. if (!ObjectInspectorUtils.isConstantObjectInspector(parameters[2])) { throw new UDFArgumentTypeException(2, - "The third argument must be a constant, but " + parameters[2].getTypeName() + + "The third argument must be a constant, but " + parameters[2].getTypeName() + " was passed instead."); } } @@ -184,7 +185,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { return new GenericUDAFSinglePercentileApproxEvaluator(); } } - + public static class GenericUDAFSinglePercentileApproxEvaluator extends GenericUDAFPercentileApproxEvaluator { @@ -234,7 +235,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { } } - + public static class GenericUDAFMultiplePercentileApproxEvaluator extends GenericUDAFPercentileApproxEvaluator { @@ -299,7 +300,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if(partial == null) { + if(partial == null) { return; } PercentileAggBuf myagg = (PercentileAggBuf) agg; @@ -316,7 +317,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { } // merge histograms - myagg.histogram.merge(partialHistogram); + myagg.histogram.merge(partialHistogram); } @Override @@ -382,7 +383,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { } else { result = new double[1]; result[0] = PrimitiveObjectInspectorUtils.getDouble( - quantileObj, + quantileObj, (PrimitiveObjectInspector)quantileOI); } for(int ii = 0; ii < result.length; ++ii) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java index 2a1a617..5d0aa50 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Compute the standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. - * + * */ @Description(name = "std,stddev,stddev_pop", value = "_FUNC_(x) - Returns the standard deviation of a set of numbers") @@ -56,6 +56,7 @@ public class GenericUDAFStd extends GenericUDAFVariance { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFStdEvaluator(); case BOOLEAN: default: @@ -68,7 +69,7 @@ public class GenericUDAFStd extends GenericUDAFVariance { /** * Compute the standard deviation by extending GenericUDAFVarianceEvaluator * and overriding the terminate() method of the evaluator. - * + * */ public static class GenericUDAFStdEvaluator extends GenericUDAFVarianceEvaluator { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java index d5791ed..cde947c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Compute the sample standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. - * + * */ @Description(name = "stddev_samp", value = "_FUNC_(x) - Returns the sample standard deviation of a set of numbers") @@ -55,6 +55,7 @@ public class GenericUDAFStdSample extends GenericUDAFVariance { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFStdSampleEvaluator(); case BOOLEAN: default: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java index 0b40d5c..7bba95c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java @@ -43,7 +43,7 @@ import org.apache.hadoop.util.StringUtils; /** * Compute the variance. This class is extended by: GenericUDAFVarianceSample * GenericUDAFStd GenericUDAFStdSample - * + * */ @Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers") @@ -72,6 +72,7 @@ public class GenericUDAFVariance extends AbstractGenericUDAFResolver { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFVarianceEvaluator(); case BOOLEAN: default: @@ -85,18 +86,18 @@ public class GenericUDAFVariance extends AbstractGenericUDAFResolver { * Evaluate the variance using the algorithm described by Chan, Golub, and LeVeque in * "Algorithms for computing the sample variance: analysis and recommendations" * The American Statistician, 37 (1983) pp. 242--247. - * + * * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2) - * + * * where: - variance is sum[x-avg^2] (this is actually n times the variance) * and is updated at every step. - n is the count of elements in chunk1 - m is - * the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 = + * the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 = * sum of elements in chunk2. * * This algorithm was proven to be numerically stable by J.L. Barlow in * "Error analysis of a pairwise summation algorithm to compute sample variance" * Numer. Math, 58 (1991) pp. 583--590 - * + * */ public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java index 65d860d..fa549e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Compute the sample variance by extending GenericUDAFVariance and overriding * the terminate() method of the evaluator. - * + * */ @Description(name = "var_samp", value = "_FUNC_(x) - Returns the sample variance of a set of numbers") @@ -56,6 +56,7 @@ public class GenericUDAFVarianceSample extends GenericUDAFVariance { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFVarianceSampleEvaluator(); case BOOLEAN: default: -- 1.7.3.4 From 813849f773ec2d1d55b21b1cffe931b611f37ab1 Mon Sep 17 00:00:00 2001 From: Josh Wills Date: Mon, 9 Jan 2012 15:13:00 -0800 Subject: [PATCH 12/12] HIVE-2693 [jira] Add DECIMAL data type Summary: Fix a bunch of classic UDFs to work with decimal types Add support for the DECIMAL data type. HIVE-2272 (TIMESTAMP) provides a nice template for how to do this. Test Plan: EMPTY Reviewers: JIRA --- .../java/org/apache/hadoop/hive/ql/udf/UDFAbs.java | 11 +++++++ .../org/apache/hadoop/hive/ql/udf/UDFCeil.java | 19 +++++++++++- .../java/org/apache/hadoop/hive/ql/udf/UDFExp.java | 11 ++++++- .../org/apache/hadoop/hive/ql/udf/UDFFloor.java | 19 +++++++++++- .../java/org/apache/hadoop/hive/ql/udf/UDFLog.java | 31 +++++++++++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFLog10.java | 19 +++++++++++- .../org/apache/hadoop/hive/ql/udf/UDFLog2.java | 16 +++++++++- .../org/apache/hadoop/hive/ql/udf/UDFOPDivide.java | 15 ++++++++- .../org/apache/hadoop/hive/ql/udf/UDFPower.java | 9 +++-- .../org/apache/hadoop/hive/ql/udf/UDFRound.java | 17 +++++++++- .../org/apache/hadoop/hive/ql/udf/UDFSqrt.java | 16 +++++++++- 11 files changed, 168 insertions(+), 15 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java index 41043bc..bfce482 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -38,6 +39,7 @@ public class UDFAbs extends UDF { private final DoubleWritable resultDouble = new DoubleWritable(); private final LongWritable resultLong = new LongWritable(); private final IntWritable resultInt = new IntWritable(); + private final BigDecimalWritable resultBigDecimal = new BigDecimalWritable(); public DoubleWritable evaluate(DoubleWritable n) { if (n == null) { @@ -68,4 +70,13 @@ public class UDFAbs extends UDF { return resultInt; } + + public BigDecimalWritable evaluate(BigDecimalWritable n) { + if (n == null) { + return null; + } + + resultBigDecimal.set(n.getBigDecimal().abs()); + return resultBigDecimal; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java index 01dd4d6..d40a1eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java @@ -18,8 +18,13 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; @@ -34,7 +39,8 @@ import org.apache.hadoop.io.LongWritable; + " 0\n" + " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + " 5") public class UDFCeil extends UDF { - private LongWritable longWritable = new LongWritable(); + private final LongWritable longWritable = new LongWritable(); + private final BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public UDFCeil() { } @@ -48,4 +54,15 @@ public class UDFCeil extends UDF { } } + public BigDecimalWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + BigDecimal bd = i.getBigDecimal(); + MathContext mc = new MathContext( + bd.precision() - bd.scale(), RoundingMode.CEILING); + bigDecimalWritable.set(bd.round(mc)); + return bigDecimalWritable; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java index 35b0987..10ae445 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -31,7 +32,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n " + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + " 1") public class UDFExp extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFExp() { } @@ -48,4 +49,12 @@ public class UDFExp extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + result.set(Math.exp(a.getBigDecimal().doubleValue())); + return result; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java index 510a161..397a86e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java @@ -18,8 +18,13 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; @@ -34,7 +39,8 @@ import org.apache.hadoop.io.LongWritable; + " -1\n" + " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + " 5") public class UDFFloor extends UDF { - private LongWritable result = new LongWritable(); + private final LongWritable result = new LongWritable(); + private final BigDecimalWritable bdResult = new BigDecimalWritable(); public UDFFloor() { } @@ -48,4 +54,15 @@ public class UDFFloor extends UDF { } } + public BigDecimalWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + BigDecimal bd = i.getBigDecimal(); + MathContext mc = new MathContext( + bd.precision() - bd.scale(), RoundingMode.FLOOR); + bdResult.set(bd.round(mc)); + return bdResult; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java index 6aa7f6a..b153fbf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -31,7 +32,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n" + " > SELECT _FUNC_(13, 13) FROM src LIMIT 1;\n" + " 1") public class UDFLog extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFLog() { } @@ -48,6 +49,19 @@ public class UDFLog extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } + result.set(Math.log(v)); + return result; + } + } + /** * Returns the logarithm of "a" with base "base". */ @@ -60,4 +74,19 @@ public class UDFLog extends UDF { } } + /** + * Returns the logarithm of "a" with base "base". + */ + public DoubleWritable evaluate(DoubleWritable base, BigDecimalWritable a) { + if (a == null || base == null || base.get() <= 1.0) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v <= 0.0) { + return null; + } + result.set(Math.log(v) / Math.log(base.get())); + return result; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java index a6e7c5e..d49384e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -33,7 +34,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; public class UDFLog10 extends UDF { private static double log10 = Math.log(10.0); - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFLog10() { } @@ -50,4 +51,20 @@ public class UDFLog10 extends UDF { } } + /** + * Returns the logarithm of "a" with base 10. + */ + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } + result.set(Math.log(v) / log10); + return result; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java index 87c6d22..1d3af4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -33,7 +34,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; public class UDFLog2 extends UDF { private static double log2 = Math.log(2.0); - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFLog2() { } @@ -50,4 +51,17 @@ public class UDFLog2 extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } + result.set(Math.log(v) / log2); + return result; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java index 0455aa9..a63ca55 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -29,12 +30,13 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; @Description(name = "/", value = "a _FUNC_ b - Divide a by b", extended = "Example:\n" + " > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + " 1.5") /** - * Note that in SQL, the return type of divide is not necessarily the same + * Note that in SQL, the return type of divide is not necessarily the same * as the parameters. For example, 3 / 2 = 1.5, not 1. To follow SQL, we always * return a double for divide. */ public class UDFOPDivide extends UDF { - private DoubleWritable doubleWritable = new DoubleWritable(); + private final DoubleWritable doubleWritable = new DoubleWritable(); + private final BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" @@ -46,4 +48,13 @@ public class UDFOPDivide extends UDF { doubleWritable.set(a.get() / b.get()); return doubleWritable; } + + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().divide(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java index 197adbb..9e211a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -31,7 +32,8 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n" + " > SELECT _FUNC_(2, 3) FROM src LIMIT 1;\n" + " 8") public class UDFPower extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable resultDouble = new DoubleWritable(); + private final BigDecimalWritable resultBigDecimal = new BigDecimalWritable(); public UDFPower() { } @@ -43,9 +45,8 @@ public class UDFPower extends UDF { if (a == null || b == null) { return null; } else { - result.set(Math.pow(a.get(), b.get())); - return result; + resultDouble.set(Math.pow(a.get(), b.get())); + return resultDouble; } } - } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java index 1841739..9957c15 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hive.ql.udf; import java.math.BigDecimal; +import java.math.MathContext; import java.math.RoundingMode; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -36,8 +38,9 @@ import org.apache.hadoop.io.LongWritable; extended = "Example:\n" + " > SELECT _FUNC_(12.3456, 1) FROM src LIMIT 1;\n" + " 12.3'") public class UDFRound extends UDF { - private DoubleWritable doubleWritable = new DoubleWritable(); - private LongWritable longWritable = new LongWritable(); + private final DoubleWritable doubleWritable = new DoubleWritable(); + private final LongWritable longWritable = new LongWritable(); + private final BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public UDFRound() { } @@ -65,4 +68,14 @@ public class UDFRound extends UDF { return doubleWritable; } + public BigDecimalWritable evaluate(BigDecimalWritable n, IntWritable i) { + if ((n == null) || (i == null)) { + return null; + } + BigDecimal bd = n.getBigDecimal(); + MathContext mc = new MathContext( + bd.precision() - (bd.scale() - i.get()), RoundingMode.HALF_UP); + bigDecimalWritable.set(bd.round(mc)); + return bigDecimalWritable; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java index 6717e9d..67343ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -30,7 +31,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n " + " > SELECT _FUNC_(4) FROM src LIMIT 1;\n" + " 2") public class UDFSqrt extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFSqrt() { } @@ -49,4 +50,17 @@ public class UDFSqrt extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + double v = i.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } else { + result.set(Math.sqrt(i.getBigDecimal().doubleValue())); + return result; + } + } + } } -- 1.7.3.4