From a00c5926292d9cfe252f55ae2169b89e518c5e86 Mon Sep 17 00:00:00 2001 From: Xiaobing Zhou Date: Thu, 29 Jan 2015 18:02:04 -0800 Subject: [PATCH] HIVE-9472: Implement 7 simple UDFs added to Hive --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 1 + .../hive/ql/udf/generic/GenericUDFDayBase.java | 144 +++++++++++++++++++++ .../hive/ql/udf/generic/GenericUDFFirstDay.java | 58 +++++++++ .../hive/ql/udf/generic/GenericUDFLastDay.java | 114 +--------------- .../ql/udf/generic/TestGenericUDFFirstDay.java | 64 +++++++++ 5 files changed, 272 insertions(+), 109 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 23d77ca..2e722da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -272,6 +272,7 @@ registerUDF("from_unixtime", UDFFromUnixTime.class, false); registerGenericUDF("to_date", GenericUDFDate.class); registerUDF("weekofyear", UDFWeekOfYear.class, false); + registerGenericUDF("first_day", GenericUDFFirstDay.class); registerGenericUDF("last_day", GenericUDFLastDay.class); registerGenericUDF("date_add", GenericUDFDateAdd.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java new file mode 100644 index 0000000..3eb3ed9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java @@ -0,0 +1,144 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; +import org.apache.hadoop.io.Text; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public abstract class GenericUDFDayBase extends GenericUDF { + + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient TimestampConverter timestampConverter; + private transient Converter textConverter; + private transient Converter dateWritableConverter; + private transient PrimitiveCategory inputType1; + protected final Calendar calendar = Calendar.getInstance(); + private final Text output = new Text(); + protected String udfName = ""; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException(" " + udfName.toUpperCase() + "() requires 1 argument, got " + + arguments.length); + } + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + arguments[0].getTypeName() + " is passed as first arguments"); + } + inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); + ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + switch (inputType1) { + case STRING: + case VARCHAR: + case CHAR: + inputType1 = PrimitiveCategory.STRING; + textConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableStringObjectInspector); + break; + case TIMESTAMP: + timestampConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); + break; + case DATE: + dateWritableConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableDateObjectInspector); + break; + default: + throw new UDFArgumentException( + " " + udfName.toUpperCase() + "() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " + + inputType1); + } + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (arguments[0].get() == null) { + return null; + } + Date date; + switch (inputType1) { + case STRING: + String dateString = textConverter.convert(arguments[0].get()).toString(); + try { + date = formatter.parse(dateString.toString()); + } catch (ParseException e) { + return null; + } + getThatDay(date); + break; + case TIMESTAMP: + Timestamp ts = ((TimestampWritable) timestampConverter.convert(arguments[0].get())) + .getTimestamp(); + date = ts; + getThatDay(date); + break; + case DATE: + DateWritable dw = (DateWritable) dateWritableConverter.convert(arguments[0].get()); + date = dw.get(); + getThatDay(date); + break; + default: + throw new UDFArgumentException( + " " + udfName.toUpperCase() + "() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); + } + Date newDate = calendar.getTime(); + output.set(formatter.format(newDate)); + return output; + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append(udfName + "("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(", "); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + + protected abstract Calendar getThatDay(Date d); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java new file mode 100644 index 0000000..fa96a60 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** + * GenericUDFFirstDay. + * + * Returns the first day of the month which the date belongs to. + * The time part of the date will be ignored. + * + */ +@Description(name = "first_day", +value = "_FUNC_(date) - Returns the first day of the month which the date belongs to.", +extended = "date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'." + + " The time part of date is ignored.\n" + + "Example:\n " + " > SELECT _FUNC_('2009-01-12') FROM src LIMIT 1;\n" + " '2009-01-01'") +public class GenericUDFFirstDay extends GenericUDFDayBase { + + public GenericUDFFirstDay() { + udfName = "first_day"; + } + + @Override + protected Calendar getThatDay(Date d) { + return firstDay(d); + } + + private Calendar firstDay(Date d) { + calendar.setTime(d); + int minDd = calendar.getActualMinimum(Calendar.DAY_OF_MONTH); + calendar.set(Calendar.DAY_OF_MONTH, minDd); + return calendar; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java index 911ee46..3462583 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java @@ -17,27 +17,10 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.Text; /** * GenericUDFLastDay. @@ -51,102 +34,15 @@ extended = "date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'." + " The time part of date is ignored.\n" + "Example:\n " + " > SELECT _FUNC_('2009-01-12') FROM src LIMIT 1;\n" + " '2009-01-31'") -public class GenericUDFLastDay extends GenericUDF { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private transient TimestampConverter timestampConverter; - private transient Converter textConverter; - private transient Converter dateWritableConverter; - private transient PrimitiveCategory inputType1; - private final Calendar calendar = Calendar.getInstance(); - private final Text output = new Text(); +public class GenericUDFLastDay extends GenericUDFDayBase { - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException("last_day() requires 1 argument, got " - + arguments.length); - } - if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + arguments[0].getTypeName() + " is passed. as first arguments"); - } - inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); - ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; - switch (inputType1) { - case STRING: - case VARCHAR: - case CHAR: - inputType1 = PrimitiveCategory.STRING; - textConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableStringObjectInspector); - break; - case TIMESTAMP: - timestampConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); - break; - case DATE: - dateWritableConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableDateObjectInspector); - break; - default: - throw new UDFArgumentException( - " LAST_DAY() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " - + inputType1); - } - return outputOI; - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - if (arguments[0].get() == null) { - return null; - } - Date date; - switch (inputType1) { - case STRING: - String dateString = textConverter.convert(arguments[0].get()).toString(); - try { - date = formatter.parse(dateString.toString()); - } catch (ParseException e) { - return null; - } - lastDay(date); - break; - case TIMESTAMP: - Timestamp ts = ((TimestampWritable) timestampConverter.convert(arguments[0].get())) - .getTimestamp(); - date = ts; - lastDay(date); - break; - case DATE: - DateWritable dw = (DateWritable) dateWritableConverter.convert(arguments[0].get()); - date = dw.get(); - lastDay(date); - break; - default: - throw new UDFArgumentException( - "LAST_DAY() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); - } - Date newDate = calendar.getTime(); - output.set(formatter.format(newDate)); - return output; + public GenericUDFLastDay() { + udfName = "last_day"; } @Override - public String getDisplayString(String[] children) { - StringBuilder sb = new StringBuilder(); - sb.append("last_day("); - if (children.length > 0) { - sb.append(children[0]); - for (int i = 1; i < children.length; i++) { - sb.append(", "); - sb.append(children[i]); - } - } - sb.append(")"); - return sb.toString(); + protected Calendar getThatDay(Date d) { + return lastDay(d); } protected Calendar lastDay(Date d) { diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java new file mode 100644 index 0000000..a4a946e --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +import junit.framework.TestCase; + +public class TestGenericUDFFirstDay extends TestCase { + + public void testFirstDay() throws HiveException { + GenericUDFFirstDay udf = new GenericUDFFirstDay(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI0 }; + + udf.initialize(arguments); + runAndVerify("2014-01-01", "2014-01-01", udf); + runAndVerify("2014-01-14", "2014-01-01", udf); + runAndVerify("2014-01-31", "2014-01-01", udf); + runAndVerify("2014-02-02", "2014-02-01", udf); + runAndVerify("2014-02-28", "2014-02-01", udf); + runAndVerify("2016-02-03", "2016-02-01", udf); + runAndVerify("2016-02-28", "2016-02-01", udf); + runAndVerify("2016-02-29", "2016-02-01", udf); + + runAndVerify("2014-01-01 10:30:45", "2014-01-01", udf); + runAndVerify("2014-01-14 10:30:45", "2014-01-01", udf); + runAndVerify("2014-01-31 10:30:45", "2014-01-01", udf); + runAndVerify("2014-02-02 10:30:45", "2014-02-01", udf); + runAndVerify("2014-02-28 10:30:45", "2014-02-01", udf); + runAndVerify("2016-02-03 10:30:45", "2016-02-01", udf); + runAndVerify("2016-02-28 10:30:45", "2016-02-01", udf); + runAndVerify("2016-02-29 10:30:45", "2016-02-01", udf); + } + + private void runAndVerify(String str, String expResult, GenericUDF udf) + throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); + DeferredObject[] args = { valueObj0 }; + Text output = (Text) udf.evaluate(args); + assertEquals("frist_day() test ", expResult, output.toString()); + } +} -- 1.9.3 (Apple Git-50)