diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 92fee9b..4372433 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -91,6 +91,7 @@ import org.apache.hadoop.hive.ql.udf.UDFSin; import org.apache.hadoop.hive.ql.udf.UDFSpace; import org.apache.hadoop.hive.ql.udf.UDFSqrt; +import org.apache.hadoop.hive.ql.udf.UDFStrToTimestamp; import org.apache.hadoop.hive.ql.udf.UDFSubstr; import org.apache.hadoop.hive.ql.udf.UDFTan; import org.apache.hadoop.hive.ql.udf.UDFToBoolean; @@ -410,7 +411,10 @@ registerGenericUDF("unix_timestamp", GenericUDFUnixTimeStamp.class); registerGenericUDF("to_unix_timestamp", GenericUDFToUnixTimeStamp.class); - // Generic UDTF's + registerGenericUDF("date_format", GenericUDFDateFormat.class); + registerUDF("str_to_date", UDFStrToTimestamp.class, false); + + // Generic UDTF's registerGenericUDTF("explode", GenericUDTFExplode.class); registerGenericUDTF("inline", GenericUDTFInline.class); registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFStrToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFStrToTimestamp.java new file mode 100644 index 0000000..551c487 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFStrToTimestamp.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.io.Text; + +import java.text.SimpleDateFormat; + +@Description(name = "str_to_date", + value = "_FUNC_(date, format) - that converts a given date in the given format to a Timestamp formatted date", + extended = "Example:\n" + + " > SELECT _FUNC_('2013/12/23', 'yyyy/MM/dd') FROM src LIMIT 1;\n" + + " '2013-12-23 00:00:00.0'") +public class UDFStrToTimestamp extends UDF { + + static final Log LOG = LogFactory.getLog(UDFStrToTimestamp.class); + + private final Text toFormat = new Text("yyyy-MM-dd HH:mm:ss.S"); + private final Text result = new Text(); + + + public Text evaluate(Text datetime, Text fromFormat) { + if(datetime == null || fromFormat == null) { + return null; + } + + try { + String converted = new SimpleDateFormat(toFormat.toString()).format( + new SimpleDateFormat(fromFormat.toString()).parse(datetime.toString())); + + result.set(converted); + return result; + } catch(Exception e) { + LOG.info( String.format("Exception during converting %s from format %s", datetime, fromFormat) ); + } + return null; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java new file mode 100644 index 0000000..b9e0340 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +@Description(name = "date_format", + value = "_FUNC_(date, format) - Formats the date/timestamp value according to the format string", + extended = "Example:\n" + + " > SELECT _FUNC_('2013/12/23', 'yyyy/MM/dd') FROM src LIMIT 1;\n" + + " '2013-12-23 00:00:00.0'") +public class GenericUDFDateFormat extends GenericUDF { + + private final static String defaultTimestampFormatStr = "yyyy-MM-dd HH:mm:ss.S"; + private final SimpleDateFormat defaultTimestampFormat = new SimpleDateFormat(defaultTimestampFormatStr); + private transient PrimitiveObjectInspectorConverter.TimestampConverter timestampConverter; + private transient ObjectInspectorConverters.Converter textConverter; + private transient ObjectInspectorConverters.Converter dateWritableConverter; + private transient ObjectInspectorConverters.Converter stringWritableConverter; + private transient PrimitiveObjectInspector.PrimitiveCategory inputType1; + private transient PrimitiveObjectInspector.PrimitiveCategory inputType2; + private final Text output = new Text(); + + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 2) { + throw new UDFArgumentLengthException( + "date_format() requires 2 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "Only primitive type arguments are accepted but " + + arguments[0].getTypeName() + " is passed. as first arguments"); + } + if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(1, + "Only primitive type arguments are accepted but " + + arguments[2].getTypeName() + " is passed. as second arguments"); + } + + inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); + ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + switch (inputType1) { + case STRING: + textConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableStringObjectInspector); + break; + case TIMESTAMP: + timestampConverter = new PrimitiveObjectInspectorConverter.TimestampConverter((PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); + break; + case DATE: + dateWritableConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[0], + PrimitiveObjectInspectorFactory.writableDateObjectInspector); + break; + default: + throw new UDFArgumentException( + " date_format() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " + + inputType1); + } + + inputType2 = ((PrimitiveObjectInspector) arguments[1]).getPrimitiveCategory(); + if (inputType2 != PrimitiveObjectInspector.PrimitiveCategory.STRING) { + throw new UDFArgumentException( + " date_format() only takes STRING types as second argument, got " + inputType2); + } + stringWritableConverter = ObjectInspectorConverters.getConverter( + (PrimitiveObjectInspector) arguments[1], + PrimitiveObjectInspectorFactory.writableStringObjectInspector); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + + Text toFormat = (Text) stringWritableConverter.convert(arguments[1].get()); + SimpleDateFormat formatter = new SimpleDateFormat(toFormat.toString()); + Date date = null; + switch (inputType1) { + case STRING: + String dateString = textConverter.convert(arguments[0].get()).toString(); + try { + date = defaultTimestampFormat.parse(dateString); + } catch (ParseException e) { + throw new UDFArgumentException( + "date_format() supports string input only in format " + defaultTimestampFormatStr); + } + break; + case TIMESTAMP: + Timestamp ts = ((TimestampWritable) timestampConverter.convert(arguments[0].get())) + .getTimestamp(); + date = new Date(ts.getTime()); + break; + case DATE: + DateWritable dw = (DateWritable) dateWritableConverter.convert(arguments[0].get()); + date = dw.get(); + break; + default: + throw new UDFArgumentException( + "date_format() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); + } + + output.set(formatter.format(date)); + return output; + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("date_format("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(", "); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFStrToTimestamp.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFStrToTimestamp.java new file mode 100644 index 0000000..204113c --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFStrToTimestamp.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + + +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class TestUDFStrToTimestamp { + + private String datetime; + private String format; + private String expectedResult; + + public TestUDFStrToTimestamp(String datetime, String format, String expectedResult) { + this.datetime = datetime; + this.format = format; + this.expectedResult = expectedResult; + } + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][]{ + {"23/12/2013", "dd/MM/yyyy", "2013-12-23 00:00:00.0"}, + {"00:05 PM, 06/12/12", "KK:mm aa, dd/MM/yy", "2012-12-06 12:05:00.0"}, + }); + } + + @Test + public void test() throws ParseException { + Assert.assertEquals(expectedResult, + new UDFStrToTimestamp().evaluate(new Text(datetime), new Text(format)).toString()); + + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java new file mode 100644 index 0000000..bc6d749 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import java.sql.Date; +import java.sql.Timestamp; + +import static org.junit.Assert.assertEquals; + +public class TestGenericUDFDateFormat { + + @Test + public void testStringFormat() throws HiveException { + GenericUDFDateFormat udf = new GenericUDFDateFormat(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector[] arguments = {valueOI1, valueOI2}; + + udf.initialize(arguments); + GenericUDF.DeferredObject valueObj1 = new GenericUDF.DeferredJavaObject(new Text("2012-07-20 04:17:52.0")); + GenericUDF.DeferredObject valueObj2 = new GenericUDF.DeferredJavaObject(new Text("dd/MM/yy")); + GenericUDF.DeferredObject[] args = {valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + + assertEquals("date_format() test for STRING failed ", "20/07/12", output.toString()); + } + + @Test + public void testDateFormat() throws HiveException { + GenericUDFDateFormat udf = new GenericUDFDateFormat(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector[] arguments = {valueOI1, valueOI2}; + + udf.initialize(arguments); + GenericUDF.DeferredObject valueObj1 = new GenericUDF.DeferredJavaObject(new DateWritable(new Date(2012-1900, 06, 20))); + GenericUDF.DeferredObject valueObj2 = new GenericUDF.DeferredJavaObject(new Text("dd/MM/yy")); + GenericUDF.DeferredObject[] args = {valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + + assertEquals("date_format() test for STRING failed ", "20/07/12", output.toString()); + } + + @Test + public void testTimestampFormat() throws HiveException { + GenericUDFDateFormat udf = new GenericUDFDateFormat(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector[] arguments = {valueOI1, valueOI2}; + + udf.initialize(arguments); + GenericUDF.DeferredObject valueObj1 = new GenericUDF.DeferredJavaObject(new TimestampWritable(new Timestamp(2012-1900, 06, + 20, 4, 17, 52, 0))); + GenericUDF.DeferredObject valueObj2 = new GenericUDF.DeferredJavaObject(new Text("dd/MM/yy HH:mm")); + GenericUDF.DeferredObject[] args = {valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + + assertEquals("date_format() test for STRING failed ", "20/07/12 04:17", output.toString()); + } + +}