.../hadoop/hive/contrib/udf/UDFParseISO8601.java | 92 ++++++++++++++++++++++ .../hive/contrib/udf/TestUDFParseISO8601.java | 67 ++++++++++++++++ 2 files changed, 159 insertions(+) diff --git a/contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFParseISO8601.java b/contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFParseISO8601.java new file mode 100644 index 0000000..1013cd9 --- /dev/null +++ b/contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFParseISO8601.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.contrib.udf; + +import java.sql.Timestamp; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TextConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * A UDF for parsing the ISO 8601 date-time format. This UDF parses strings + * representing a date-time with an offset, such as '2011-12-03T10:15:30+01:00'. + * + * @see Wikipedia - ISO 8601 + */ +@Description(name = "parse_iso8601", + value = "parse_iso8601(string timestamp) - " + + "Parses an ISO 8601 formatted timestamp") +public class UDFParseISO8601 extends GenericUDF { + + private TextConverter textConverter; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException("The function " + getName() + " requires one " + + "argument, got " + arguments.length); + } + try { + textConverter = new TextConverter((PrimitiveObjectInspector) arguments[0]); + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function " + getName() + " takes only primitive types"); + } + + return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + String timestampStr = textConverter.convert(o0).toString(); + ZonedDateTime dateTime = ZonedDateTime.parse(timestampStr, DateTimeFormatter.ISO_OFFSET_DATE_TIME); + + return Timestamp.from(dateTime.toInstant()); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("Converting field "); + sb.append(children[0]); + return sb.toString(); + } + + public String getName() { + return "parse_iso8601"; + } + + protected boolean invert() { + return false; + } +} diff --git a/contrib/src/test/org/apache/hadoop/hive/contrib/udf/TestUDFParseISO8601.java b/contrib/src/test/org/apache/hadoop/hive/contrib/udf/TestUDFParseISO8601.java new file mode 100644 index 0000000..a8afd6a --- /dev/null +++ b/contrib/src/test/org/apache/hadoop/hive/contrib/udf/TestUDFParseISO8601.java @@ -0,0 +1,67 @@ +package org.apache.hadoop.hive.contrib.udf; + +import java.sql.Timestamp; +import java.time.format.DateTimeParseException; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +public class TestUDFParseISO8601 { + + @Test + @SuppressWarnings("resource") + public void testUDFParseISO8601() throws Exception { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + UDFParseISO8601 udf = new UDFParseISO8601(); + ObjectInspector[] args2 = { valueOI }; + udf.initialize(args2); + + String dateStr = "2017-11-19T02:50:35-05:00"; + DeferredObject[] args = { new DeferredJavaObject(new Text(dateStr)) }; + Timestamp result = (Timestamp) udf.evaluate(args); + Assert.assertEquals(result.getTime(), 1511077835000l); + } + + @Test(expected = UDFArgumentException.class) + @SuppressWarnings("resource") + public void testUDFParseISO8601InvalidInitialize() throws Exception { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + UDFParseISO8601 udf = new UDFParseISO8601(); + ObjectInspector[] args2 = { valueOI, valueOI }; + udf.initialize(args2); + } + + @Test(expected = DateTimeParseException.class) + @SuppressWarnings("resource") + public void testUDFParseISO8601Invalid() throws Exception { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + UDFParseISO8601 udf = new UDFParseISO8601(); + ObjectInspector[] args2 = { valueOI }; + udf.initialize(args2); + + // Missing 'T' + String dateStr = "2017-11-19 02:50:35-05:00"; + DeferredObject[] args = { new DeferredJavaObject(new Text(dateStr)) }; + Timestamp result = (Timestamp) udf.evaluate(args); + Assert.assertEquals(result.getTime(), 1511077835000l); + } + + @Test + @SuppressWarnings("resource") + public void testUDFParseISO8601Null() throws Exception { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + UDFParseISO8601 udf = new UDFParseISO8601(); + ObjectInspector[] args2 = { valueOI }; + udf.initialize(args2); + + DeferredObject[] args = { new DeferredJavaObject(null) }; + Object result = udf.evaluate(args); + Assert.assertNull(result); + } +}