From 4e5e3cdd5ac465a60e8cf1a9f95923fca831177f Mon Sep 17 00:00:00 2001 From: Xiaobing Zhou Date: Tue, 27 Jan 2015 14:31:13 -0800 Subject: [PATCH] HIVE-9480: Implement FIRST_DAY as compared with LAST_DAY already supported in HIVE --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 1 + .../hive/ql/udf/generic/GenericUDFDayBase.java | 126 +++++++++++++++++++++ .../hive/ql/udf/generic/GenericUDFFirstDay.java | 40 +++++++ .../hive/ql/udf/generic/GenericUDFLastDay.java | 99 +--------------- .../ql/udf/generic/TestGenericUDFFirstDay.java | 46 ++++++++ 5 files changed, 219 insertions(+), 93 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 23d77ca..2e722da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -272,6 +272,7 @@ registerUDF("from_unixtime", UDFFromUnixTime.class, false); registerGenericUDF("to_date", GenericUDFDate.class); registerUDF("weekofyear", UDFWeekOfYear.class, false); + registerGenericUDF("first_day", GenericUDFFirstDay.class); registerGenericUDF("last_day", GenericUDFLastDay.class); registerGenericUDF("date_add", GenericUDFDateAdd.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java new file mode 100644 index 0000000..90726be --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDayBase.java @@ -0,0 +1,126 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; +import org.apache.hadoop.io.Text; + +public abstract class GenericUDFDayBase extends GenericUDF { + + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient TimestampConverter timestampConverter; + private transient Converter textConverter; + private transient Converter dateWritableConverter; + private transient PrimitiveCategory inputType1; + protected final Calendar calendar = Calendar.getInstance(); + private final Text output = new Text(); + protected String udfName = ""; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException(" " + udfName.toUpperCase() + "() requires 1 argument, got " + + arguments.length); + } + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + arguments[0].getTypeName() + " is passed as first arguments"); + } + inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); + ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + switch (inputType1) { + case STRING: + case VARCHAR: + case CHAR: + inputType1 = PrimitiveCategory.STRING; + textConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableStringObjectInspector); + break; + case TIMESTAMP: + timestampConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); + break; + case DATE: + dateWritableConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableDateObjectInspector); + break; + default: + throw new UDFArgumentException( + " " + udfName.toUpperCase() + "() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " + + inputType1); + } + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (arguments[0].get() == null) { + return null; + } + Date date; + switch (inputType1) { + case STRING: + String dateString = textConverter.convert(arguments[0].get()).toString(); + try { + date = formatter.parse(dateString.toString()); + } catch (ParseException e) { + return null; + } + getThatDay(date); + break; + case TIMESTAMP: + Timestamp ts = ((TimestampWritable) timestampConverter.convert(arguments[0].get())) + .getTimestamp(); + date = ts; + getThatDay(date); + break; + case DATE: + DateWritable dw = (DateWritable) dateWritableConverter.convert(arguments[0].get()); + date = dw.get(); + getThatDay(date); + break; + default: + throw new UDFArgumentException( + " " + udfName.toUpperCase() + "() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); + } + Date newDate = calendar.getTime(); + output.set(formatter.format(newDate)); + return output; + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append(udfName + "("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(", "); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + + protected abstract Calendar getThatDay(Date d); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java new file mode 100644 index 0000000..f271b27 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFirstDay.java @@ -0,0 +1,40 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** + * GenericUDFFirstDay. + * + * Returns the first day of the month which the date belongs to. + * The time part of the date will be ignored. + * + */ +@Description(name = "first_day", +value = "_FUNC_(date) - Returns the first day of the month which the date belongs to.", +extended = "date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'." + + " The time part of date is ignored.\n" + + "Example:\n " + " > SELECT _FUNC_('2009-01-12') FROM src LIMIT 1;\n" + " '2009-01-01'") +public class GenericUDFFirstDay extends GenericUDFDayBase { + + public GenericUDFFirstDay() { + udfName = "first_day"; + } + + @Override + protected Calendar getThatDay(Date d) { + return firstDay(d); + } + + private Calendar firstDay(Date d) { + calendar.setTime(d); + int minDd = calendar.getActualMinimum(Calendar.DAY_OF_MONTH); + calendar.set(Calendar.DAY_OF_MONTH, minDd); + return calendar; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java index 911ee46..6269d09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLastDay.java @@ -51,105 +51,18 @@ extended = "date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'." + " The time part of date is ignored.\n" + "Example:\n " + " > SELECT _FUNC_('2009-01-12') FROM src LIMIT 1;\n" + " '2009-01-31'") -public class GenericUDFLastDay extends GenericUDF { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private transient TimestampConverter timestampConverter; - private transient Converter textConverter; - private transient Converter dateWritableConverter; - private transient PrimitiveCategory inputType1; - private final Calendar calendar = Calendar.getInstance(); - private final Text output = new Text(); +public class GenericUDFLastDay extends GenericUDFDayBase { - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException("last_day() requires 1 argument, got " - + arguments.length); - } - if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + arguments[0].getTypeName() + " is passed. as first arguments"); - } - inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); - ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; - switch (inputType1) { - case STRING: - case VARCHAR: - case CHAR: - inputType1 = PrimitiveCategory.STRING; - textConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableStringObjectInspector); - break; - case TIMESTAMP: - timestampConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); - break; - case DATE: - dateWritableConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableDateObjectInspector); - break; - default: - throw new UDFArgumentException( - " LAST_DAY() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " - + inputType1); - } - return outputOI; - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - if (arguments[0].get() == null) { - return null; - } - Date date; - switch (inputType1) { - case STRING: - String dateString = textConverter.convert(arguments[0].get()).toString(); - try { - date = formatter.parse(dateString.toString()); - } catch (ParseException e) { - return null; - } - lastDay(date); - break; - case TIMESTAMP: - Timestamp ts = ((TimestampWritable) timestampConverter.convert(arguments[0].get())) - .getTimestamp(); - date = ts; - lastDay(date); - break; - case DATE: - DateWritable dw = (DateWritable) dateWritableConverter.convert(arguments[0].get()); - date = dw.get(); - lastDay(date); - break; - default: - throw new UDFArgumentException( - "LAST_DAY() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); - } - Date newDate = calendar.getTime(); - output.set(formatter.format(newDate)); - return output; + public GenericUDFLastDay() { + udfName = "last_day"; } @Override - public String getDisplayString(String[] children) { - StringBuilder sb = new StringBuilder(); - sb.append("last_day("); - if (children.length > 0) { - sb.append(children[0]); - for (int i = 1; i < children.length; i++) { - sb.append(", "); - sb.append(children[i]); - } - } - sb.append(")"); - return sb.toString(); + protected Calendar getThatDay(Date d) { + return lastDay(d); } - protected Calendar lastDay(Date d) { + private Calendar lastDay(Date d) { calendar.setTime(d); int maxDd = calendar.getActualMaximum(Calendar.DAY_OF_MONTH); calendar.set(Calendar.DAY_OF_MONTH, maxDd); diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java new file mode 100644 index 0000000..a145e65 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFirstDay.java @@ -0,0 +1,46 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +import junit.framework.TestCase; + +public class TestGenericUDFFirstDay extends TestCase { + + public void testFirstDay() throws HiveException { + GenericUDFFirstDay udf = new GenericUDFFirstDay(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI0 }; + + udf.initialize(arguments); + runAndVerify("2014-01-01", "2014-01-01", udf); + runAndVerify("2014-01-14", "2014-01-01", udf); + runAndVerify("2014-01-31", "2014-01-01", udf); + runAndVerify("2014-02-02", "2014-02-01", udf); + runAndVerify("2014-02-28", "2014-02-01", udf); + runAndVerify("2016-02-03", "2016-02-01", udf); + runAndVerify("2016-02-28", "2016-02-01", udf); + runAndVerify("2016-02-29", "2016-02-01", udf); + + runAndVerify("2014-01-01 10:30:45", "2014-01-01", udf); + runAndVerify("2014-01-14 10:30:45", "2014-01-01", udf); + runAndVerify("2014-01-31 10:30:45", "2014-01-01", udf); + runAndVerify("2014-02-02 10:30:45", "2014-02-01", udf); + runAndVerify("2014-02-28 10:30:45", "2014-02-01", udf); + runAndVerify("2016-02-03 10:30:45", "2016-02-01", udf); + runAndVerify("2016-02-28 10:30:45", "2016-02-01", udf); + runAndVerify("2016-02-29 10:30:45", "2016-02-01", udf); + } + + private void runAndVerify(String str, String expResult, GenericUDF udf) + throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); + DeferredObject[] args = { valueObj0 }; + Text output = (Text) udf.evaluate(args); + assertEquals("frist_day() test ", expResult, output.toString()); + } +} -- 1.9.3 (Apple Git-50)