diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index b0d7a4e8f0..95820ed012 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -522,6 +522,8 @@ system.registerGenericUDF("unix_timestamp", GenericUDFUnixTimeStamp.class); system.registerGenericUDF("to_unix_timestamp", GenericUDFToUnixTimeStamp.class); + system.registerGenericUDF("to_proleptic_gregorian", GenericUDFToProlepticGregorian.class); + system.registerGenericUDF("internal_interval", GenericUDFInternalInterval.class); system.registerGenericUDF("to_epoch_milli", GenericUDFEpochMilli.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToProlepticGregorian.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToProlepticGregorian.java new file mode 100644 index 0000000000..21689807e2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToProlepticGregorian.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.text.SimpleDateFormat; +import java.util.TimeZone; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + + +/** + * GenericUDFToProlepticGregorian. + */ +@Description(name = "to_proleptic_gregorian", value = "_FUNC_(date/timestamp) - Converts a date/timestamp " + + "from Gregorian-Julian hybrid calendar to proleptic Gregorian calendar.", + extended = "Converts a date/timestamp from Gregorian-Julian hybrid calendar, i.e., calendar \n" + + "that supports both the Julian and Gregorian calendar systems with the support of a single discontinuity, \n" + + "which corresponds by default to the Gregorian date when the Gregorian calendar was instituted, to \n" + + "proleptic Gregorian calendar (ISO 8601 standard), which is produced by extending the Gregorian calendar \n" + + "backward to dates preceding its official introduction in 1582.") +public class GenericUDFToProlepticGregorian extends GenericUDF { + + private transient PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector resultOI; + private transient Converter converter; + private transient SimpleDateFormat formatter; + + private DateWritableV2 dateWritable = new DateWritableV2(); + private TimestampWritableV2 timestampWritable = new TimestampWritableV2(); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 1) { + throw new UDFArgumentLengthException( + "The function to_proleptic_gregorian requires at least one argument, got " + + arguments.length); + } + + try { + inputOI = (PrimitiveObjectInspector) arguments[0]; + PrimitiveCategory pc = inputOI.getPrimitiveCategory(); + switch (pc) { + case DATE: + formatter = new SimpleDateFormat("yyyy-MM-dd"); + formatter.setLenient(false); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + converter = ObjectInspectorConverters.getConverter(inputOI, + PrimitiveObjectInspectorFactory.writableDateObjectInspector); + resultOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + break; + case TIMESTAMP: + formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + formatter.setLenient(false); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + converter = ObjectInspectorConverters.getConverter(inputOI, + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); + resultOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + break; + default: + throw new UDFArgumentException( + "to_proleptic_gregorian only allows date or timestamp types"); + } + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function to_proleptic_gregorian takes only primitive types"); + } + + return resultOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object input = arguments[0].get(); + if (input == null) { + return null; + } + + input = converter.convert(input); + + switch (resultOI.getPrimitiveCategory()) { + case DATE: + Date date = ((DateWritableV2) input).get(); + java.sql.Date oldDate = new java.sql.Date(date.toEpochMilli()); + dateWritable.set(Date.valueOf(formatter.format(oldDate))); + return dateWritable; + case TIMESTAMP: + Timestamp timestamp = ((TimestampWritableV2) input).getTimestamp(); + Timestamp adjustedTimestamp = Timestamp.valueOf( + formatter.format(new java.sql.Timestamp(timestamp.toEpochMilli()))); + adjustedTimestamp.setNanos(timestamp.getNanos()); + timestampWritable.set(adjustedTimestamp); + return timestampWritable; + default: + // Should never happen. + throw new IllegalStateException("Unexpected type in evaluating to_proleptic_gregorian: " + + inputOI.getPrimitiveCategory()); + } + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString(getFuncName(), children); + } + + @Override + protected String getFuncName() { + return "to_proleptic_gregorian"; + } +} \ No newline at end of file diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToProlepticGregorian.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToProlepticGregorian.java new file mode 100644 index 0000000000..a9aaf5b56f --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToProlepticGregorian.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * TestGenericUDFToProlepticGregorian. + */ +public class TestGenericUDFToProlepticGregorian { + + public static void runAndVerify(GenericUDF udf, + Object arg1, Object expected) throws HiveException { + DeferredObject[] args = { new DeferredJavaObject(arg1) }; + Object result = udf.evaluate(args); + + if (expected == null) { + assertNull(result); + } else { + assertEquals(expected.toString(), result.toString()); + } + } + + @Test + public void testToProlepticGregorianDate() throws Exception { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + GenericUDFToProlepticGregorian udf = new GenericUDFToProlepticGregorian(); + ObjectInspector[] args2 = {valueOI, valueOI}; + udf.initialize(args2); + + runAndVerify(udf, + new DateWritableV2(Date.valueOf("0000-12-30")), + new DateWritableV2(Date.valueOf("0001-01-01"))); + + runAndVerify(udf, + new DateWritableV2(Date.valueOf("0601-03-07")), + new DateWritableV2(Date.valueOf("0601-03-04"))); + + runAndVerify(udf, + new DateWritableV2(Date.valueOf("2015-03-07")), + new DateWritableV2(Date.valueOf("2015-03-07"))); + } + + @Test + public void testToProlepticGregorianTimestamp() throws Exception { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + GenericUDFToProlepticGregorian udf = new GenericUDFToProlepticGregorian(); + ObjectInspector[] args2 = {valueOI, valueOI}; + udf.initialize(args2); + + runAndVerify(udf, + new TimestampWritableV2(Timestamp.valueOf("0601-03-07 17:00:00")), + new TimestampWritableV2(Timestamp.valueOf("0601-03-04 17:00:00"))); + + runAndVerify(udf, + new TimestampWritableV2(Timestamp.valueOf("2015-03-07 17:00:00")), + new TimestampWritableV2(Timestamp.valueOf("2015-03-07 17:00:00"))); + + // Make sure nanos are preserved + runAndVerify(udf, + new TimestampWritableV2(Timestamp.valueOf("0601-03-07 18:00:00.123456789")), + new TimestampWritableV2(Timestamp.valueOf("0601-03-04 18:00:00.123456789"))); + + runAndVerify(udf, + new TimestampWritableV2(Timestamp.valueOf("2018-07-07 18:00:00.123456789")), + new TimestampWritableV2(Timestamp.valueOf("2018-07-07 18:00:00.123456789"))); + } +} diff --git a/ql/src/test/queries/clientpositive/udf_to_proleptic_gregorian.q b/ql/src/test/queries/clientpositive/udf_to_proleptic_gregorian.q new file mode 100644 index 0000000000..66ecdf161e --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_to_proleptic_gregorian.q @@ -0,0 +1,14 @@ +--! qt:dataset:src + +DESCRIBE FUNCTION to_proleptic_gregorian; +DESCRIBE FUNCTION EXTENDED to_proleptic_gregorian; + +SELECT + '0601-03-07' AS dts, + CAST('0601-03-07' AS DATE) AS dt, + to_proleptic_gregorian(CAST('0601-03-07' AS DATE)) AS dtp; + +SELECT + '0501-03-07 17:03:00.4321' AS tss, + CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + to_proleptic_gregorian(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp; diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index d88a5f2c67..e63276890f 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -286,6 +286,7 @@ surrogate_key tan to_date to_epoch_milli +to_proleptic_gregorian to_unix_timestamp to_utc_timestamp translate @@ -720,6 +721,7 @@ surrogate_key tan to_date to_epoch_milli +to_proleptic_gregorian to_unix_timestamp to_utc_timestamp translate diff --git a/ql/src/test/results/clientpositive/udf_to_proleptic_gregorian.q.out b/ql/src/test/results/clientpositive/udf_to_proleptic_gregorian.q.out new file mode 100644 index 0000000000..fd734f330e --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_to_proleptic_gregorian.q.out @@ -0,0 +1,47 @@ +PREHOOK: query: DESCRIBE FUNCTION to_proleptic_gregorian +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION to_proleptic_gregorian +POSTHOOK: type: DESCFUNCTION +to_proleptic_gregorian(date/timestamp) - Converts a date/timestamp from Gregorian-Julian hybrid calendar to proleptic Gregorian calendar. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED to_proleptic_gregorian +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED to_proleptic_gregorian +POSTHOOK: type: DESCFUNCTION +to_proleptic_gregorian(date/timestamp) - Converts a date/timestamp from Gregorian-Julian hybrid calendar to proleptic Gregorian calendar. +Converts a date/timestamp from Gregorian-Julian hybrid calendar, i.e., calendar +that supports both the Julian and Gregorian calendar systems with the support of a single discontinuity, +which corresponds by default to the Gregorian date when the Gregorian calendar was instituted, to +proleptic Gregorian calendar (ISO 8601 standard), which is produced by extending the Gregorian calendar +backward to dates preceding its official introduction in 1582. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFToProlepticGregorian +Function type:BUILTIN +PREHOOK: query: SELECT + '0601-03-07' AS dts, + CAST('0601-03-07' AS DATE) AS dt, + to_proleptic_gregorian(CAST('0601-03-07' AS DATE)) AS dtp +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '0601-03-07' AS dts, + CAST('0601-03-07' AS DATE) AS dt, + to_proleptic_gregorian(CAST('0601-03-07' AS DATE)) AS dtp +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +0601-03-07 0601-03-07 0601-03-04 +PREHOOK: query: SELECT + '0501-03-07 17:03:00.4321' AS tss, + CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + to_proleptic_gregorian(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '0501-03-07 17:03:00.4321' AS tss, + CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + to_proleptic_gregorian(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +0501-03-07 17:03:00.4321 0501-03-07 17:03:00.4321 0501-03-05 17:03:00.4321