From a640e177c3f928121b46f3a4d3e13d1ee76fbb80 Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Thu, 30 Apr 2020 10:50:42 -0700 Subject: [PATCH] HIVE-22476 : Hive datediff function provided inconsistent results when hive.fetch.task.conversion is set to none --- .../apache/hadoop/hive/common/type/Date.java | 5 +-- .../hive/common/type/TimestampTZUtil.java | 1 - .../udf/generic/TestGenericUDFDateDiff.java | 17 ++++++++ .../ql/udf/generic/TestGenericUDFLastDay.java | 2 +- .../ql/udf/generic/TestGenericUDFNextDay.java | 2 +- ql/src/test/queries/clientpositive/date_udf.q | 6 +++ .../clientpositive/llap/date_udf.q.out | 43 +++++++++++++++++++ 7 files changed, 70 insertions(+), 6 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/type/Date.java b/common/src/java/org/apache/hadoop/hive/common/type/Date.java index 6ecfcf65c9..3113be8231 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/Date.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/Date.java @@ -124,9 +124,8 @@ public void setTimeInMillis(long epochMilli) { public static Date valueOf(String s) { s = s.trim(); - int idx = s.indexOf(" "); - if (idx != -1) { - s = s.substring(0, idx); + if (s.length() > 10) { + s = s.substring(0, 10); } LocalDate localDate; try { diff --git a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java index 4708d35a78..862acb8880 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java @@ -53,7 +53,6 @@ builder.optionalStart().appendLiteral(" ").append(DateTimeFormatter.ofPattern("HH:mm:ss")). optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true). optionalEnd().optionalEnd(); - // Zone part builder.optionalStart().appendLiteral(" ").optionalEnd(); builder.optionalStart().appendZoneText(TextStyle.NARROW).optionalEnd(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateDiff.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateDiff.java index 86b914dcef..30a9e74d36 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateDiff.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateDiff.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.text.ParseException; import java.time.LocalDateTime; import org.apache.hadoop.hive.common.type.Date; @@ -35,12 +36,28 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; + import org.junit.Test; /** * TestGenericUDFDateDiff. */ public class TestGenericUDFDateDiff { + @Test + public void testStringToDateISOFormat() throws HiveException, ParseException { + GenericUDFDateDiff udf = new GenericUDFDateDiff(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector[] arguments = {valueOI1, valueOI2}; + + udf.initialize(arguments); + DeferredObject valueObj1 = new DeferredJavaObject(new Text("2019-09-09T10:45:49+02:00")); + DeferredObject valueObj2 = new DeferredJavaObject(new Text("2019-11-07 23:20:39.503")); + DeferredObject[] args = {valueObj1, valueObj2}; + IntWritable output = udf.evaluate(args); + assertEquals("date_iff() test for STRING failed ", "-59", output.toString()); + } + @Test public void testStringToDate() throws HiveException { GenericUDFDateDiff udf = new GenericUDFDateDiff(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java index 3cbcbdb7cc..6f4da1867c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java @@ -94,7 +94,7 @@ public void testWrongTsStr() throws HiveException { runAndVerify("2016-02-30 10:30:45", "2016-03-31", udf); runAndVerify("2014-01-32 10:30:45", "2014-02-28", udf); runAndVerify("01/14/2014 10:30:45", null, udf); - runAndVerify("2016-02-28T10:30:45", null, udf); + runAndVerify("2016-02-28T10:30:45", "2016-02-29", udf); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFNextDay.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFNextDay.java index 4acfe612e4..0358fe23fe 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFNextDay.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFNextDay.java @@ -93,7 +93,7 @@ public void testNotValidValues() throws Exception { runAndVerify("2015-02-30 10:30:00", "WE", "2015-03-04", udf); runAndVerify("2015-02-32 10:30:00", "WE", "2015-03-11", udf); runAndVerify("2015/01/14 14:04:34", "SAT", null, udf); - runAndVerify("2015-01-14T14:04:34", "SAT", null, udf); + runAndVerify("2015-01-14T14:04:34", "SAT", "2015-01-17", udf); } @Test diff --git a/ql/src/test/queries/clientpositive/date_udf.q b/ql/src/test/queries/clientpositive/date_udf.q index aa33b7a524..1b0b7f1030 100644 --- a/ql/src/test/queries/clientpositive/date_udf.q +++ b/ql/src/test/queries/clientpositive/date_udf.q @@ -75,7 +75,13 @@ select select min(fl_date) from date_udf_flight; select max(fl_date) from date_udf_flight; +create external table testdatediff(datetimecol string) stored as orc; +insert into testdatediff values ('2019-09-09T10:45:49+02:00'),('2019-07-24'); +select datetimecol from testdatediff where datediff(cast(current_timestamp as string), datetimecol)<183; +select cast(datetimecol as date), datetimecol from testdatediff; + +drop table testdatediff; drop table date_udf; drop table date_udf_string; drop table date_udf_flight; diff --git a/ql/src/test/results/clientpositive/llap/date_udf.q.out b/ql/src/test/results/clientpositive/llap/date_udf.q.out index 5c44e5e64e..02d5482dfd 100644 --- a/ql/src/test/results/clientpositive/llap/date_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/date_udf.q.out @@ -228,6 +228,49 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight #### A masked pattern was here #### 2010-10-31 +PREHOOK: query: create external table testdatediff(datetimecol string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testdatediff +POSTHOOK: query: create external table testdatediff(datetimecol string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testdatediff +PREHOOK: query: insert into testdatediff values ('2019-09-09T10:45:49+02:00'),('2019-07-24') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testdatediff +POSTHOOK: query: insert into testdatediff values ('2019-09-09T10:45:49+02:00'),('2019-07-24') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testdatediff +POSTHOOK: Lineage: testdatediff.datetimecol SCRIPT [] +PREHOOK: query: select datetimecol from testdatediff where datediff(cast(current_timestamp as string), datetimecol)<183 +PREHOOK: type: QUERY +PREHOOK: Input: default@testdatediff +#### A masked pattern was here #### +POSTHOOK: query: select datetimecol from testdatediff where datediff(cast(current_timestamp as string), datetimecol)<183 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdatediff +#### A masked pattern was here #### +PREHOOK: query: select cast(datetimecol as date), datetimecol from testdatediff +PREHOOK: type: QUERY +PREHOOK: Input: default@testdatediff +#### A masked pattern was here #### +POSTHOOK: query: select cast(datetimecol as date), datetimecol from testdatediff +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdatediff +#### A masked pattern was here #### +2019-09-09 2019-09-09T10:45:49+02:00 +2019-07-24 2019-07-24 +PREHOOK: query: drop table testdatediff +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testdatediff +PREHOOK: Output: default@testdatediff +POSTHOOK: query: drop table testdatediff +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testdatediff +POSTHOOK: Output: default@testdatediff PREHOOK: query: drop table date_udf PREHOOK: type: DROPTABLE PREHOOK: Input: default@date_udf -- 2.17.2 (Apple Git-113)