commit 1e0897a61ba7be849759ec4fa816be95cf0a7114 Author: Bharath Krishna Date: Tue May 15 15:18:45 2018 -0700 HIVE-19370 : Support add_months function to retain time part in hive diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java index dae4b97b4a17e98122431e5fda655fd9f873fdb5..9fc6c944fa4f3c1fb04cceeab7cd0c9857bff70a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java @@ -22,15 +22,18 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP; +import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; @@ -44,32 +47,61 @@ * */ @Description(name = "add_months", - value = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.", - extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" - + " 'yyyy-MM-dd'. num_months is a number. The time part of start_date is " - + "ignored.\n" - + "Example:\n " + " > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'") + value = "_FUNC_(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date.", + extended = "start_date is a string or timestamp indicating a valid date. " + + "num_months is a number. output_date_format is an optional String which specifies the format for output.\n" + + "The default output format is 'YYYY-MM-dd'.\n" + + "Example:\n > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'." + + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;\n" + + "'2018-02-28 14:15:16'.\n") @NDV(maxNdv = 250) // 250 seems to be reasonable upper limit for this public class GenericUDFAddMonths extends GenericUDF { - private transient Converter[] converters = new Converter[2]; - private transient PrimitiveCategory[] inputTypes = new PrimitiveCategory[2]; - private final Calendar calendar = Calendar.getInstance(); + private transient Converter[] tsConverters = new Converter[3]; + private transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[3]; + private transient Converter[] dtConverters = new Converter[3]; + private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); + private transient SimpleDateFormat formatter = null; + private final Calendar calendar = Calendar.getInstance(); private transient Integer numMonthsConst; private transient boolean isNumMonthsConst; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - checkArgsSize(arguments, 2, 2); + checkArgsSize(arguments, 2, 3); checkArgPrimitive(arguments, 0); checkArgPrimitive(arguments, 1); - checkArgGroups(arguments, 0, inputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); - checkArgGroups(arguments, 1, inputTypes, NUMERIC_GROUP, VOID_GROUP); + if (arguments.length == 3) { + if (arguments[2] instanceof ConstantObjectInspector) { + checkArgPrimitive(arguments, 2); + checkArgGroups(arguments, 2, tsInputTypes, STRING_GROUP); + String fmtStr = getConstantStringValue(arguments, 2); + if (fmtStr != null) { + try { + formatter = new SimpleDateFormat(fmtStr); + } catch (IllegalArgumentException e) { + // ignore + } + } + } + } + if (formatter == null) { + //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd + formatter = DateUtils.getDateFormat(); + } + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); + checkArgGroups(arguments, 0, dtInputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); - obtainDateConverter(arguments, 0, inputTypes, converters); - obtainIntConverter(arguments, 1, inputTypes, converters); + obtainTimestampConverter(arguments, 0, tsInputTypes, tsConverters); + obtainDateConverter(arguments, 0, dtInputTypes, dtConverters); + + checkArgGroups(arguments, 1, tsInputTypes, NUMERIC_GROUP, VOID_GROUP); + obtainIntConverter(arguments, 1, tsInputTypes, tsConverters); if (arguments[1] instanceof ConstantObjectInspector) { numMonthsConst = getConstantIntValue(arguments, 1); @@ -86,7 +118,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if (isNumMonthsConst) { numMonthV = numMonthsConst; } else { - numMonthV = getIntValue(arguments, 1, converters); + numMonthV = getIntValue(arguments, 1, tsConverters); } if (numMonthV == null) { @@ -94,14 +126,24 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } int numMonthInt = numMonthV.intValue(); - Date date = getDateValue(arguments, 0, inputTypes, converters); + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + Date date = getTimestampValue(arguments, 0, tsConverters); if (date == null) { - return null; + date = getDateValue(arguments, 0, dtInputTypes, dtConverters); + if (date == null) { + return null; + } } addMonth(date, numMonthInt); Date newDate = calendar.getTime(); - output.set(DateUtils.getDateFormat().format(newDate)); + String res = formatter.format(newDate); + if (res == null) { + return null; + } + output.set(res); return output; } diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index af9b6c43c7dafc69c4944eab02894786af306f35..b3917f43d16a9e9d0b1f54c7f9cdb0cfe87dcf22 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -19,19 +19,27 @@ import junit.framework.TestCase; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +import java.sql.Timestamp; public class TestGenericUDFAddMonths extends TestCase { + private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss"); + private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS"); + private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd"); + public void testAddMonthsInt() throws HiveException { GenericUDFAddMonths udf = new GenericUDFAddMonths(); ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; @@ -61,6 +69,71 @@ public void testAddMonthsInt() throws HiveException { runAndVerify("2016-02-29 10:30:00", -12, "2015-02-28", udf); runAndVerify("2016-01-29 10:30:00", 1, "2016-02-29", udf); runAndVerify("2016-02-29 10:30:00", -1, "2016-01-31", udf); + runAndVerify("2016-02-29 10:30:00", -1, fmtTextWithoutTime, "2016-01-31", udf); + } + + public void testAddMonthsStringWithTime() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextWithTime); + + ObjectInspector[] arguments = { valueOI0, valueOI1, valueOI2 }; + udf.initialize(arguments); + runAndVerify("2018-05-10 08:15:12", -1, fmtTextWithTime, "2018-04-10 08:15:12", udf); + runAndVerify("2017-12-31 14:15:16", 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); + runAndVerify("2017-12-31 14:15:16.001", 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); + } + + public void testAddMonthsStringWithTimeWithms() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextWithTimeAndms); + + ObjectInspector[] arguments = { valueOI0, valueOI1, valueOI2 }; + udf.initialize(arguments); + runAndVerify("2017-12-31 14:15:16.350", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.350", + udf); + runAndVerify("2017-12-31 14:15:16.001", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.001", + udf); + //Try to parse ms where there is no millisecond part in input, expected to return .000 as ms + runAndVerify("2017-12-31 14:15:16", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.000", udf); + } + + public void testAddMonthsWithNullFormatter() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + null); + + ObjectInspector[] arguments = { valueOI0, valueOI1, valueOI2 }; + udf.initialize(arguments); + runAndVerify("2017-12-31 14:15:16.350", 2, null, "2018-02-28", + udf); + runAndVerify("2017-12-31 14:15:16", 2, null, "2018-02-28", + udf); + runAndVerify("2017-12-31", 2, null, "2018-02-28", + udf); + } + public void testAddMonthsTimestamp() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtTextWithTime); + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + + udf.initialize(arguments); + runAndVerify(Timestamp.valueOf("2018-05-10 08:15:12"), 1, fmtTextWithTime, "2018-06-10 08:15:12", udf); + runAndVerify(Timestamp.valueOf("2017-12-31 14:15:16"), 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); } public void testWrongDateStr() throws HiveException { @@ -150,6 +223,28 @@ private void runAndVerify(String str, int months, String expResult, GenericUDF u assertEquals("add_months() test ", expResult, output != null ? output.toString() : null); } + private void runAndVerify(String str, int months, Text dateFormat, String expResult, + GenericUDF udf) throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); + DeferredObject valueObj1 = new DeferredJavaObject(new IntWritable(months)); + DeferredObject valueObj2 = new DeferredJavaObject(dateFormat); + DeferredObject[] args = {valueObj0, valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + assertEquals("add_months() test with time part", expResult, + output != null ? output.toString() : null); + } + + private void runAndVerify(Timestamp ts, int months, Text dateFormat, String expResult, GenericUDF udf) + throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new TimestampWritable(ts)); + DeferredObject valueObj1 = new DeferredJavaObject(new IntWritable(months)); + DeferredObject valueObj2 = new DeferredJavaObject(dateFormat); + DeferredObject[] args = {valueObj0, valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + assertEquals("add_months() test for timestamp", expResult, output != null ? output.toString() : null); + } + + private void runAndVerify(String str, short months, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(new Text(str));