commit d15f621d8b69115721ab779f9950bd34ef8efeee Author: Bharath Krishna Date: Tue May 15 15:18:45 2018 -0700 HIVE-19370 : Support add_months function to retain time part in hive diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java index dae4b97b4a17e98122431e5fda655fd9f873fdb5..ea1544f55225907a44c989acff0c22f9c08eba47 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java @@ -22,11 +22,13 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP; +import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -44,32 +46,61 @@ * */ @Description(name = "add_months", - value = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.", - extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" - + " 'yyyy-MM-dd'. num_months is a number. The time part of start_date is " - + "ignored.\n" - + "Example:\n " + " > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'") + value = "_FUNC_(start_date, num_months, output_date_format) - " + + "Returns the date that is num_months after start_date.", + extended = "start_date is a string or timestamp indicating a valid date. " + + "num_months is a number. output_date_format is an optional String which specifies the format for output.\n" + + "The default output format is 'YYYY-MM-dd'.\n" + + "Example:\n > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'." + + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;\n" + + "'2018-02-28 14:15:16'.\n") @NDV(maxNdv = 250) // 250 seems to be reasonable upper limit for this public class GenericUDFAddMonths extends GenericUDF { - private transient Converter[] converters = new Converter[2]; - private transient PrimitiveCategory[] inputTypes = new PrimitiveCategory[2]; - private final Calendar calendar = Calendar.getInstance(); + private transient Converter[] tsConverters = new Converter[3]; + private transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[3]; + private transient Converter[] dtConverters = new Converter[3]; + private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); + private transient SimpleDateFormat formatter = null; + private final Calendar calendar = Calendar.getInstance(); private transient Integer numMonthsConst; private transient boolean isNumMonthsConst; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - checkArgsSize(arguments, 2, 2); + checkArgsSize(arguments, 2, 3); checkArgPrimitive(arguments, 0); checkArgPrimitive(arguments, 1); - checkArgGroups(arguments, 0, inputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); - checkArgGroups(arguments, 1, inputTypes, NUMERIC_GROUP, VOID_GROUP); + if (arguments.length == 3) { + if (arguments[2] instanceof ConstantObjectInspector) { + checkArgPrimitive(arguments, 2); + checkArgGroups(arguments, 2, tsInputTypes, STRING_GROUP); + String fmtStr = getConstantStringValue(arguments, 2); + if (fmtStr != null) { + formatter = new SimpleDateFormat(fmtStr); + } + } else { + throw new UDFArgumentTypeException(2, getFuncName() + " only takes constant as " + + getArgOrder(2) + " argument"); + } + } + if (formatter == null) { + //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd + formatter = DateUtils.getDateFormat(); + } + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); + checkArgGroups(arguments, 0, dtInputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); - obtainDateConverter(arguments, 0, inputTypes, converters); - obtainIntConverter(arguments, 1, inputTypes, converters); + obtainTimestampConverter(arguments, 0, tsInputTypes, tsConverters); + obtainDateConverter(arguments, 0, dtInputTypes, dtConverters); + + checkArgGroups(arguments, 1, tsInputTypes, NUMERIC_GROUP, VOID_GROUP); + obtainIntConverter(arguments, 1, tsInputTypes, tsConverters); if (arguments[1] instanceof ConstantObjectInspector) { numMonthsConst = getConstantIntValue(arguments, 1); @@ -86,7 +117,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if (isNumMonthsConst) { numMonthV = numMonthsConst; } else { - numMonthV = getIntValue(arguments, 1, converters); + numMonthV = getIntValue(arguments, 1, tsConverters); } if (numMonthV == null) { @@ -94,14 +125,22 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } int numMonthInt = numMonthV.intValue(); - Date date = getDateValue(arguments, 0, inputTypes, converters); + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + Date date = getTimestampValue(arguments, 0, tsConverters); if (date == null) { - return null; + date = getDateValue(arguments, 0, dtInputTypes, dtConverters); + if (date == null) { + return null; + } } addMonth(date, numMonthInt); Date newDate = calendar.getTime(); - output.set(DateUtils.getDateFormat().format(newDate)); + String res = formatter.format(newDate); + + output.set(res); return output; } diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index af9b6c43c7dafc69c4944eab02894786af306f35..0db9370ef746eca40a4c989bcebf7fe7c819ed7f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -25,13 +25,21 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +import java.sql.Timestamp; public class TestGenericUDFAddMonths extends TestCase { + private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss"); + private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS"); + private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd"); + private final Text fmtTextInvalid = new Text("YYYY-abcdz"); + public void testAddMonthsInt() throws HiveException { GenericUDFAddMonths udf = new GenericUDFAddMonths(); ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; @@ -61,6 +69,85 @@ public void testAddMonthsInt() throws HiveException { runAndVerify("2016-02-29 10:30:00", -12, "2015-02-28", udf); runAndVerify("2016-01-29 10:30:00", 1, "2016-02-29", udf); runAndVerify("2016-02-29 10:30:00", -1, "2016-01-31", udf); + runAndVerify("2016-02-29 10:30:00", -1, fmtTextWithoutTime, "2016-01-31", udf); + } + + public void testAddMonthsStringWithTime() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextWithTime); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + runAndVerify("2018-05-10 08:15:12", -1, fmtTextWithTime, "2018-04-10 08:15:12", udf); + runAndVerify("2017-12-31 14:15:16", 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); + runAndVerify("2017-12-31 14:15:16.001", 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); + } + + public void testAddMonthsInvalidFormatter() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextInvalid); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + try { + udf.initialize(arguments); + fail("Expected to throw an exception for invalid DateFormat"); + } catch (IllegalArgumentException e) { + //test success if exception caught + } + } + public void testAddMonthsStringWithTimeWithms() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextWithTimeAndms); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + runAndVerify("2017-12-31 14:15:16.350", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.350", + udf); + runAndVerify("2017-12-31 14:15:16.001", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.001", + udf); + //Try to parse ms where there is no millisecond part in input, expected to return .000 as ms + runAndVerify("2017-12-31 14:15:16", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.000", udf); + } + + public void testAddMonthsWithNullFormatter() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + null); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + runAndVerify("2017-12-31 14:15:16.350", 2, null, "2018-02-28", + udf); + runAndVerify("2017-12-31", 2, null, "2018-02-28", + udf); + } + public void testAddMonthsTimestamp() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtTextWithTime); + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + + udf.initialize(arguments); + runAndVerify(Timestamp.valueOf("2018-05-10 08:15:12"), 1, fmtTextWithTime, "2018-06-10 08:15:12", udf); + runAndVerify(Timestamp.valueOf("2017-12-31 14:15:16"), 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); } public void testWrongDateStr() throws HiveException { @@ -150,6 +237,28 @@ private void runAndVerify(String str, int months, String expResult, GenericUDF u assertEquals("add_months() test ", expResult, output != null ? output.toString() : null); } + private void runAndVerify(String str, int months, Text dateFormat, String expResult, + GenericUDF udf) throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); + DeferredObject valueObj1 = new DeferredJavaObject(new IntWritable(months)); + DeferredObject valueObj2 = new DeferredJavaObject(dateFormat); + DeferredObject[] args = {valueObj0, valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + assertEquals("add_months() test with time part", expResult, + output != null ? output.toString() : null); + } + + private void runAndVerify(Timestamp ts, int months, Text dateFormat, String expResult, GenericUDF udf) + throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new TimestampWritable(ts)); + DeferredObject valueObj1 = new DeferredJavaObject(new IntWritable(months)); + DeferredObject valueObj2 = new DeferredJavaObject(dateFormat); + DeferredObject[] args = {valueObj0, valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + assertEquals("add_months() test for timestamp", expResult, output != null ? output.toString() : null); + } + + private void runAndVerify(String str, short months, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); diff --git ql/src/test/queries/clientpositive/udf_add_months.q ql/src/test/queries/clientpositive/udf_add_months.q index 0b8eeeeb444fd1657117dec18c2b4e7173767617..0ef7c3e0aa08bf43ec296f502443645e0867ccc4 100644 --- ql/src/test/queries/clientpositive/udf_add_months.q +++ ql/src/test/queries/clientpositive/udf_add_months.q @@ -25,6 +25,7 @@ add_months('2014-04-30 10:30:00', -2), add_months('2015-02-28 10:30:00', 12), add_months('2016-02-29 10:30:00', -12), add_months('2016-01-29 10:30:00', 1), +add_months('2016-02-29 10:30:12', -12, 'YYYY-MM-dd HH:mm:ss'), add_months('2016-02-29 10:30:00', -1); select @@ -37,4 +38,5 @@ add_months(cast('2015-02-28 10:30:00' as timestamp), 12), add_months(cast('2016-02-29 10:30:00' as timestamp), -12), add_months(cast('2016-01-29 10:30:00' as timestamp), 1), add_months(cast('2016-02-29 10:30:00' as timestamp), -1), +add_months(cast('2016-02-29 10:30:12' as timestamp), 2, 'YYYY-MM-dd HH:mm:ss'), add_months(cast(null as timestamp), 1); diff --git ql/src/test/results/clientpositive/udf_add_months.q.out ql/src/test/results/clientpositive/udf_add_months.q.out index 5ba720ae85d30c0da7f94f377d6b324bce850907..69b0c4f541c093721cf3ab04018c360fb8b4bb31 100644 --- ql/src/test/results/clientpositive/udf_add_months.q.out +++ ql/src/test/results/clientpositive/udf_add_months.q.out @@ -2,16 +2,20 @@ PREHOOK: query: DESCRIBE FUNCTION add_months PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION add_months POSTHOOK: type: DESCFUNCTION -add_months(start_date, num_months) - Returns the date that is num_months after start_date. +add_months(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date. PREHOOK: query: DESCRIBE FUNCTION EXTENDED add_months PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED add_months POSTHOOK: type: DESCFUNCTION -add_months(start_date, num_months) - Returns the date that is num_months after start_date. -start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_months is a number. The time part of start_date is ignored. +add_months(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date. +start_date is a string or timestamp indicating a valid date. num_months is a number. output_date_format is an optional String which specifies the format for output. +The default output format is 'YYYY-MM-dd'. Example: > SELECT add_months('2009-08-31', 1) FROM src LIMIT 1; - '2009-09-30' + '2009-09-30'. + > SELECT add_months('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1; +'2018-02-28 14:15:16'. + Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFAddMonths Function type:BUILTIN PREHOOK: query: explain select add_months('2014-01-14', 1) @@ -76,6 +80,7 @@ add_months('2014-04-30 10:30:00', -2), add_months('2015-02-28 10:30:00', 12), add_months('2016-02-29 10:30:00', -12), add_months('2016-01-29 10:30:00', 1), +add_months('2016-02-29 10:30:12', -12, 'YYYY-MM-dd HH:mm:ss'), add_months('2016-02-29 10:30:00', -1) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -89,11 +94,12 @@ add_months('2014-04-30 10:30:00', -2), add_months('2015-02-28 10:30:00', 12), add_months('2016-02-29 10:30:00', -12), add_months('2016-01-29 10:30:00', 1), +add_months('2016-02-29 10:30:12', -12, 'YYYY-MM-dd HH:mm:ss'), add_months('2016-02-29 10:30:00', -1) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2016-01-31 +2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2015-02-28 10:30:12 2016-01-31 PREHOOK: query: select add_months(cast('2014-01-14 10:30:00' as timestamp), 1), add_months(cast('2014-01-31 10:30:00' as timestamp), 1), @@ -104,6 +110,7 @@ add_months(cast('2015-02-28 10:30:00' as timestamp), 12), add_months(cast('2016-02-29 10:30:00' as timestamp), -12), add_months(cast('2016-01-29 10:30:00' as timestamp), 1), add_months(cast('2016-02-29 10:30:00' as timestamp), -1), +add_months(cast('2016-02-29 10:30:12' as timestamp), 2, 'YYYY-MM-dd HH:mm:ss'), add_months(cast(null as timestamp), 1) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -118,8 +125,9 @@ add_months(cast('2015-02-28 10:30:00' as timestamp), 12), add_months(cast('2016-02-29 10:30:00' as timestamp), -12), add_months(cast('2016-01-29 10:30:00' as timestamp), 1), add_months(cast('2016-02-29 10:30:00' as timestamp), -1), +add_months(cast('2016-02-29 10:30:12' as timestamp), 2, 'YYYY-MM-dd HH:mm:ss'), add_months(cast(null as timestamp), 1) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2016-01-31 NULL +2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2016-01-31 2016-04-30 10:30:12 NULL