diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 259fde8..00a4f38 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -43,9 +43,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hive.common.util.DateUtils; @@ -530,6 +532,20 @@ protected String getConstantStringValue(ObjectInspector[] arguments, int i) { return str; } + protected Boolean getConstantBooleanValue(ObjectInspector[] arguments, int i) + throws UDFArgumentTypeException { + Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue(); + if (constValue == null) { + return false; + } + if (constValue instanceof BooleanWritable) { + return ((BooleanWritable) constValue).get(); + } else { + throw new UDFArgumentTypeException(i, getFuncName() + " only takes BOOLEAN types as " + + getArgOrder(i) + " argument, got " + constValue.getClass()); + } + } + protected Integer getConstantIntValue(ObjectInspector[] arguments, int i) throws UDFArgumentTypeException { Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java index 35dc51a..be9127a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -44,7 +45,8 @@ * UDFMonthsBetween. * */ -@Description(name = "months_between", value = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2", +@Description(name = "months_between", value = "_FUNC_(date1, date2, roundOff) " + + "- returns number of months between dates date1 and date2", extended = "If date1 is later than date2, then the result is positive. " + "If date1 is earlier than date2, then the result is negative. " + "If date1 and date2 are either the same days of the month or both last days of months, " @@ -53,7 +55,7 @@ + "month and considers the difference in time components date1 and date2.\n" + "date1 and date2 type can be date, timestamp or string in the format " + "'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. " - + "The result is rounded to 8 decimal places.\n" + + "The result is rounded to 8 decimal places by default. Set roundOff=false otherwise. \n" + " Example:\n" + " > SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677") public class GenericUDFMonthsBetween extends GenericUDF { @@ -64,14 +66,21 @@ private final Calendar cal1 = Calendar.getInstance(); private final Calendar cal2 = Calendar.getInstance(); private final DoubleWritable output = new DoubleWritable(); + private boolean isRoundOffNeeded = true; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - checkArgsSize(arguments, 2, 2); + checkArgsSize(arguments, 2, 3); checkArgPrimitive(arguments, 0); checkArgPrimitive(arguments, 1); + if (arguments.length == 3) { + if (arguments[2] instanceof ConstantObjectInspector) { + isRoundOffNeeded = getConstantBooleanValue(arguments, 2); + } + } + // the function should support both short date and full timestamp format // time part of the timestamp should not be skipped checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP); @@ -129,9 +138,11 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { // 1 sec is 0.000000373 months (1/2678400). 1 month is 31 days. // there should be no adjustments for leap seconds double monBtwDbl = monDiffInt + (sec1 - sec2) / 2678400D; - // Round a double to 8 decimal places. - double result = BigDecimal.valueOf(monBtwDbl).setScale(8, ROUND_HALF_UP).doubleValue(); - output.set(result); + if (isRoundOffNeeded) { + // Round a double to 8 decimal places. + monBtwDbl = BigDecimal.valueOf(monBtwDbl).setScale(8, ROUND_HALF_UP).doubleValue(); + } + output.set(monBtwDbl); return output; } diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java index 4e650b5..224047d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java @@ -23,11 +23,12 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMonthsBetween; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.Text; @@ -36,13 +37,29 @@ public class TestGenericUDFMonthsBetween extends TestCase { public void testMonthsBetweenForString() throws HiveException { + // Default run GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween(); ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; ObjectInspector[] arguments = { valueOI1, valueOI2 }; - udf.initialize(arguments); + testMonthsBetweenForString(udf); + + // Run without round-off + GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween(); + ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector vOI3 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo, + new BooleanWritable(false)); + ObjectInspector[] args = { vOI1, vOI2, vOI3 }; + udfWithoutRoundOff.initialize(args); + + testMonthsBetweenForString(udf); + } + + public void testMonthsBetweenForString(GenericUDFMonthsBetween udf) throws HiveException { // test month diff with fraction considering time components runTestStr("1995-02-02", "1995-01-01", 1.03225806, udf); runTestStr("2003-07-17", "2005-07-06", -23.64516129, udf); @@ -97,6 +114,8 @@ public void testMonthsBetweenForString() throws HiveException { // string dates without day should be parsed to null runTestStr("2002-03", "2002-02-24", null, udf); runTestStr("2002-03-24", "2002-02", null, udf); + + runTestStr("2003-04-23", "2002-04-24", 11.96774194, udf); } public void testMonthsBetweenForTimestamp() throws HiveException { @@ -104,9 +123,24 @@ public void testMonthsBetweenForTimestamp() throws HiveException { ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; ObjectInspector[] arguments = { valueOI1, valueOI2 }; - udf.initialize(arguments); + testMonthsBetweenForTimestamp(udf); + + // Run without round-off + GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween(); + ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector vOI3 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo, + new BooleanWritable(false)); + ObjectInspector[] args = { vOI1, vOI2, vOI3 }; + udfWithoutRoundOff.initialize(args); + + testMonthsBetweenForTimestamp(udfWithoutRoundOff); + } + + public void testMonthsBetweenForTimestamp(GenericUDFMonthsBetween udf) throws HiveException { // test month diff with fraction considering time components runTestTs("1995-02-02 00:00:00", "1995-01-01 00:00:00", 1.03225806, udf); runTestTs("2003-07-17 00:00:00", "2005-07-06 00:00:00", -23.64516129, udf); @@ -142,6 +176,8 @@ public void testMonthsBetweenForTimestamp() throws HiveException { runTestTs("2002-03-24 00:00:00", "2002-02-24 10:30:00", 1.0, udf); runTestTs("2002-03-24 10:30:00", "2002-02-24 00:00:00", 1.0, udf); + runTestTs("2003-04-23 23:59:59", "2003-03-24 00:0:0", 0.99999963, udf); + // Test with null args runTestTs(null, "2002-03-01 00:00:00", null, udf); runTestTs("2002-02-28 00:00:00", null, null, udf); @@ -153,9 +189,24 @@ public void testMonthsBetweenForDate() throws HiveException { ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; ObjectInspector[] arguments = { valueOI1, valueOI2 }; - udf.initialize(arguments); + testMonthsBetweenForDate(udf); + + // Run without round-off + GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween(); + ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + ObjectInspector vOI3 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo, + new BooleanWritable(false)); + ObjectInspector[] args = { vOI1, vOI2, vOI3 }; + udfWithoutRoundOff.initialize(args); + + testMonthsBetweenForDate(udfWithoutRoundOff); + } + + public void testMonthsBetweenForDate(GenericUDFMonthsBetween udf) throws HiveException { // test month diff with fraction considering time components runTestDt("1995-02-02", "1995-01-01", 1.03225806, udf); runTestDt("2003-07-17", "2005-07-06", -23.64516129, udf); diff --git ql/src/test/results/clientpositive/udf_months_between.q.out ql/src/test/results/clientpositive/udf_months_between.q.out index 913cd35..d46d466 100644 --- ql/src/test/results/clientpositive/udf_months_between.q.out +++ ql/src/test/results/clientpositive/udf_months_between.q.out @@ -2,14 +2,14 @@ PREHOOK: query: describe function months_between PREHOOK: type: DESCFUNCTION POSTHOOK: query: describe function months_between POSTHOOK: type: DESCFUNCTION -months_between(date1, date2) - returns number of months between dates date1 and date2 +months_between(date1, date2, roundOff) - returns number of months between dates date1 and date2 PREHOOK: query: desc function extended months_between PREHOOK: type: DESCFUNCTION POSTHOOK: query: desc function extended months_between POSTHOOK: type: DESCFUNCTION -months_between(date1, date2) - returns number of months between dates date1 and date2 +months_between(date1, date2, roundOff) - returns number of months between dates date1 and date2 If date1 is later than date2, then the result is positive. If date1 is earlier than date2, then the result is negative. If date1 and date2 are either the same days of the month or both last days of months, then the result is always an integer. Otherwise the UDF calculates the fractional portion of the result based on a 31-day month and considers the difference in time components date1 and date2. -date1 and date2 type can be date, timestamp or string in the format 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. The result is rounded to 8 decimal places. +date1 and date2 type can be date, timestamp or string in the format 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. The result is rounded to 8 decimal places by default. Set roundOff=false otherwise. Example: > SELECT months_between('1997-02-28 10:30:00', '1996-10-30'); 3.94959677