diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java new file mode 100644 index 0000000000..e0622473ce --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +/** + * Interface used for formatting and parsing timestamps. Initially created so that user is able to + * optionally format datetime objects into strings and parse strings into datetime objects with + * SQL:2016 semantics, as well as with the legacy (java.text.SimpleDateFormat) format. + */ +public interface HiveDateTimeFormatter { + + /** + * Only used for HiveSimpleDateFormatter, which is a wrapper for the given SimpleDateFormat + * object. + */ + void setFormatter(SimpleDateFormat simpleDateFormat) throws WrongFormatterException; + + /** + * Only used for HiveJavaDateTimeFormatter, which is a wrapper for the given DateTimeFormatter + * object. + */ + void setFormatter(DateTimeFormatter dateTimeFormatter) throws WrongFormatterException; + + /** + * Format the given timestamp into a string. + */ + String format(Timestamp ts); + + /** + * Parse the given string into a timestamp. + * + * @throws ParseException if string cannot be parsed. + */ + Timestamp parse(String string) throws ParseException; + + /** + * Set the format pattern to be used for formatting timestamps or parsing strings. + * Different HiveDateTimeFormatter implementations interpret some patterns differently. For + * example, HiveSimpleDateFormatter interprets the string "mm" as minute, while + * HiveSqlDateTimeFormatter interprets it as month. + * This method parses the pattern into tokens, so it comes with some performance overhead. + */ + void setPattern(String pattern); + + /** + * Get the format pattern to be used for formatting timestamps or parsing strings. + */ + String getPattern(); + + /** + * Set the time zone of the formatter. Only HiveSimpleDateFormatter uses this. + */ + void setTimeZone(TimeZone timeZone); + +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java new file mode 100644 index 0000000000..147ece5336 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +/** + * Wrapper for DateTimeFormatter in the java.time package. + */ +public class HiveJavaDateTimeFormatter implements HiveDateTimeFormatter { + + private DateTimeFormatter formatter; + + @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) { + this.formatter = dateTimeFormatter; + } + + @Override public String format(Timestamp ts) { + return formatter.format( + LocalDateTime.ofInstant( + Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneId.of("UTC"))); + } + + @Override public Timestamp parse(String string) { + return Timestamp.valueOf(string); + } + + // unused methods + @Override public void setPattern(String pattern) {} + @Override public String getPattern() { + return null; + } + @Override public void setTimeZone(TimeZone timeZone) {} + @Override public void setFormatter(SimpleDateFormat simpleDateFormat) + throws WrongFormatterException { + throw new WrongFormatterException("HiveJavaDateTimeFormatter formatter wraps an object of type" + + "java.time.format.DateTimeFormatter, formatter cannot be of type " + + "java.text.SimpleDateFormat"); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java new file mode 100644 index 0000000000..db7822b38f --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.Date; +import java.util.TimeZone; + +/** + * Wrapper for java.text.SimpleDateFormat. + */ +public class HiveSimpleDateFormatter implements HiveDateTimeFormatter { + + private SimpleDateFormat format = new SimpleDateFormat(); + private String pattern; + + public HiveSimpleDateFormatter() {} + + @Override public void setFormatter(SimpleDateFormat simpleDateFormat) { + this.format = simpleDateFormat; + } + + @Override public String format(Timestamp ts) { + Date date = new Date(ts.toEpochMilli()); + return format.format(date); + } + + @Override public Timestamp parse(String string) throws ParseException { + try { + Date date = format.parse(string); + return Timestamp.ofEpochMilli(date.getTime()); + } catch (java.text.ParseException e) { + throw new ParseException( + "String " + string + " could not be parsed by java.text.SimpleDateFormat: " + format); + } + } + + @Override public void setPattern(String pattern) { + format.applyPattern(pattern); + this.pattern = pattern; + } + + @Override public String getPattern() { + return pattern; + } + + @Override public void setTimeZone(TimeZone timeZone) { + format.setTimeZone(timeZone); + } + + /// unused methods + @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) + throws WrongFormatterException { + throw new WrongFormatterException( + "HiveSimpleDateFormatter formatter wraps an object of type java.text.SimpleDateFormat, " + + "formatter cannot be of type java.time.format.DateTimeFormatter"); + } + +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java new file mode 100644 index 0000000000..1ccaa5cf2b --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +/** + * Formatter using SQL:2016 datetime patterns. + */ + +public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter { + + private String pattern; + private TimeZone timeZone; + + public HiveSqlDateTimeFormatter() {} + + @Override public void setPattern(String pattern) { + this.pattern = pattern; + } + + @Override public String getPattern() { + return pattern; + } + + @Override public String format(Timestamp ts) { + //TODO replace with actual implementation: + HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter(); + formatter.setPattern(pattern); + if (timeZone != null) formatter.setTimeZone(timeZone); + else formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC)); + return formatter.format(ts); + } + + @Override public Timestamp parse(String string) throws ParseException { + //TODO replace with actual implementation: + // todo should be able to remove the time zone (city) from tstzs; if it doesn't then deal with it in TimestampTZUtil#parseOrNull(java.lang.String, java.time.ZoneId, org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter) + + HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter(); + formatter.setPattern(pattern); + if (timeZone != null) formatter.setTimeZone(timeZone); + else formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC)); + try { + return formatter.parse(string); + } catch (Exception e) { + throw new ParseException(e); + } + } + + @Override public void setTimeZone(TimeZone timeZone) { + this.timeZone = timeZone; + } + + // unused methods + @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) + throws WrongFormatterException { + throw new WrongFormatterException("HiveSqlDateTimeFormatter is not a wrapper for " + + "java.time.format.DateTimeFormatter, use HiveJavaDateTimeFormatter instead."); + } + @Override public void setFormatter(SimpleDateFormat simpleDateFormat) + throws WrongFormatterException { + throw new WrongFormatterException("HiveSqlDateTimeFormatter is not a wrapper for " + + "java.text.SimpleDateFormat, use HiveSimpleDateFormatter instead."); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java new file mode 100644 index 0000000000..a7ac231682 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +/** + * Exception indicating that the given string could not be parsed into a datetime object. + */ +public class ParseException extends Exception { + + public ParseException(String message) { + super(message); + } + + public ParseException(Throwable cause) { + super(cause); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java new file mode 100644 index 0000000000..972e7e934f --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +/** + * Exception indicating that a wrapper class was given the wrong type of object to wrap. + * E.g. HiveSimpleDateFormatter wraps a SimpleDateFormat object, and cannot be given a + * java.time.DateTimeFormatter. + */ +public class WrongFormatterException extends Exception { + + public WrongFormatterException(String message) { + super(message); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java new file mode 100644 index 0000000000..056b80035b --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java @@ -0,0 +1,19 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; \ No newline at end of file diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java index 6ecfcf65c9..feb09dee16 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Date.java +++ common/src/java/org/apache/hadoop/hive/common/type/Date.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; + import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -72,6 +75,13 @@ public String toString() { return localDate.format(PRINT_FORMATTER); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + return formatter.format(Timestamp.ofEpochMilli(toEpochMilli())); + } + public int hashCode() { return localDate.hashCode(); } @@ -137,6 +147,14 @@ public static Date valueOf(String s) { return new Date(localDate); } + public static Date valueOf(String s, HiveDateTimeFormatter formatter) throws ParseException { + if (formatter == null) { + return valueOf(s); + } + s = s.trim(); + return Date.ofEpochMilli(formatter.parse(s).toEpochMilli()); + } + public static Date ofEpochDay(int epochDay) { return new Date(LocalDate.ofEpochDay(epochDay)); } diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java index a8b7b6d186..587d31b494 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java +++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; + import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -101,6 +104,13 @@ public String toString() { return localDateTime.format(PRINT_FORMATTER); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + return formatter.format(this); + } + public int hashCode() { return localDateTime.hashCode(); } @@ -166,6 +176,16 @@ public static Timestamp valueOf(String s) { return new Timestamp(localDateTime); } + public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter) + throws ParseException { + if (formatter == null) { + return valueOf(s); + } + + s = s.trim(); + return formatter.parse(s); + } + public static Timestamp ofEpochSecond(long epochSecond) { return ofEpochSecond(epochSecond, 0); } diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java index 4708d35a78..6bfbb8e71a 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java @@ -34,6 +34,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -110,6 +112,26 @@ private static String handleSingleDigitHourOffset(String s) { return s; } + public static TimestampTZ parseOrNull( + String s, ZoneId convertToTimeZone, HiveDateTimeFormatter formatter) { + if (formatter == null) { + return parseOrNull(s, convertToTimeZone); + } + + Timestamp ts; + try { + ts = formatter.parse(s); + } catch (ParseException e) { + return null; + } + if (ts == null) { + return null; + } + TimestampTZ tsLTZ = new TimestampTZ(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC); + // change time zone to default timeZone, retaining same instant + tsLTZ.setZonedDateTime(tsLTZ.getZonedDateTime().withZoneSameInstant(convertToTimeZone)); + return tsLTZ; + } public static TimestampTZ parseOrNull(String s, ZoneId defaultTimeZone) { try { diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java index f26f8ae01e..38b00fac2c 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import java.math.BigDecimal; @@ -171,6 +173,18 @@ public static long millisToSeconds(long millis) { private static final int DATE_LENGTH = "YYYY-MM-DD".length(); + public static Timestamp stringToTimestamp(String s, HiveDateTimeFormatter formatter) { + if (formatter == null) { + return stringToTimestamp(s); + } + + try { + return Timestamp.valueOf(s, formatter); + } catch (ParseException e) { + return null; + } + } + public static Timestamp stringToTimestamp(String s) { s = s.trim(); // Handle simpler cases directly avoiding exceptions diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0c2bd1e5ad..c753ca06f4 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1543,6 +1543,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "used instead. Time-zone IDs can be specified as region-based zone IDs (based on IANA time-zone data),\n" + "abbreviated zone IDs, or offset IDs."), + HIVE_USE_SQL_DATETIME_FORMAT("hive.use.sql.datetime.formats", false, + "Datetime used in \"CAST...AS...FORMAT \" statements will be parsed " + + "and formatted according to the SQL:2016 semantics instead of the original " + + "SimpleDateFormatter format. This applies to functions that take a datetime format as " + + "well; e.g. date_format, add_months."), + HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false, "Whether to log Hive query, query plan, runtime statistics etc."), diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java index 5db14f1906..84a8ae81db 100644 --- common/src/java/org/apache/hive/common/util/DateParser.java +++ common/src/java/org/apache/hive/common/util/DateParser.java @@ -17,6 +17,8 @@ */ package org.apache.hive.common.util; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; import org.apache.hadoop.hive.common.type.Date; /** @@ -36,10 +38,14 @@ public Date parseDate(String strValue) { } public boolean parseDate(String strValue, Date result) { + return parseDate(strValue, result, null); + } + + public boolean parseDate(String strValue, Date result, HiveDateTimeFormatter formatter) { Date parsedVal; try { - parsedVal = Date.valueOf(strValue); - } catch (IllegalArgumentException e) { + parsedVal = Date.valueOf(strValue, formatter); + } catch (IllegalArgumentException | ParseException e) { parsedVal = null; } if (parsedVal == null) { diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java new file mode 100644 index 0000000000..0e5574075a --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; + +/** + * Test class for HiveJavaDateTimeFormatter. + */ +public class TestHiveJavaDateTimeFormatter { + + private static final DateTimeFormatter DATE_TIME_FORMATTER; + static { + DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); + builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd(); + DATE_TIME_FORMATTER = builder.toFormatter(); + } + private HiveDateTimeFormatter formatter = new HiveJavaDateTimeFormatter(); + + @Before + public void setUp() throws WrongFormatterException { + formatter.setFormatter(DATE_TIME_FORMATTER); + } + + @Test + public void testFormat() { + Timestamp ts = Timestamp.valueOf("2019-01-01 00:00:00.99999"); + Assert.assertEquals("2019-01-01 00:00:00.99999", formatter.format(ts)); + } + + @Test + public void testParse() throws ParseException { + String s = "2019-01-01 00:00:00.99999"; + Assert.assertEquals(Timestamp.valueOf("2019-01-01 00:00:00.99999"), formatter.parse(s)); + } + +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java new file mode 100644 index 0000000000..51fd92e37b --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.text.SimpleDateFormat; +import java.time.ZoneOffset; +import java.util.TimeZone; + +/** + * Tests HiveSimpleDateFormatter. + */ +public class TestHiveSimpleDateFormatter { + + private HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter(); + + @Before + public void setUp() throws WrongFormatterException { + formatter.setFormatter(new SimpleDateFormat()); + formatter.setPattern("yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC)); + } + + @Test + public void testFormat() { + verifyFormat("2019-01-01 01:01:01"); + verifyFormat("2019-01-01 00:00:00"); + verifyFormat("1960-01-01 23:00:00"); + } + + private void verifyFormat(String s) { + Timestamp ts = Timestamp.valueOf(s); + Assert.assertEquals(s, formatter.format(ts)); + } + + @Test + public void testParse() throws ParseException { + verifyParse("2019-01-01 01:10:10"); + verifyParse("1960-01-01 23:00:00"); + + } + + private void verifyParse(String s) throws ParseException { + Timestamp ts = Timestamp.valueOf(s); + Assert.assertEquals(ts, formatter.parse(s)); + } +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java new file mode 100644 index 0000000000..312e13cf2a --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.junit.Test; + +/** + * Test class for HiveSqlDateTimeFormatter. + */ + +public class TestHiveSqlDateTimeFormatter { + + private HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + + @Test + public void testFormat() { + } + + @Test + public void testParse() throws ParseException { + } +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java new file mode 100644 index 0000000000..056b80035b --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java @@ -0,0 +1,19 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5ff338660f..9c5e18879f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastCharToBinary; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToStringWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal; @@ -70,6 +71,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToStringWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarChar; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal; @@ -3136,9 +3138,17 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } else if (isDecimalFamily(inputType)) { return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { //second argument will be format string + return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { + return createVectorExpression(CastDateToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { //second argument will be format string + return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { + return createVectorExpression(CastTimestampToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isStringFamily(inputType)) { // STRING and VARCHAR types require no conversion, so use a no-op. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java new file mode 100644 index 0000000000..61b466df3e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO STRING WITH FORMAT ). + */ +public class CastDateToStringWithFormat extends CastDateToString { + private static final long serialVersionUID = 1L; + protected transient Date dt; + private transient HiveDateTimeFormatter formatter; + + public CastDateToStringWithFormat() { + super(); + } + + public CastDateToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found + } + + formatter = GenericUDF.getSqlDateTimeFormatterOrNull(); + if (formatter == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found + } + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8)); + } + + // The assign method will be overridden for CHAR and VARCHAR. + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); + } + + @Override + protected void func(BytesColumnVector outV, long[] vector, int i) { + byte[] temp = formatter.format( + Timestamp.ofEpochMilli(Date.ofEpochDay((int) vector[i]).toEpochMilli())) + .getBytes(); + assign(outV, i, temp, temp.length); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.INT_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index a6dff12e1a..44a451b3bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -151,10 +152,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } - private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { + /** + * Used by CastStringToDate. + */ + protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { + evaluate(outputColVector, inV, i, null); + } + + /** + * Used by CastStringToDateWithFormat. + */ + protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i, + HiveDateTimeFormatter formatter) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); Date hDate = new Date(); - if (dateParser.parseDate(dateString, hDate)) { + if (dateParser.parseDate(dateString, hDate, formatter)) { outputColVector.vector[i] = DateWritableV2.dateToDays(hDate); return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java new file mode 100644 index 0000000000..a127aaf536 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO DATE WITH FORMAT ). + */ +public class CastStringToDateWithFormat extends CastStringToDate { + + private HiveDateTimeFormatter formatter; + + public CastStringToDateWithFormat() { + super(); + } + + public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found + } + + formatter = GenericUDF.getSqlDateTimeFormatterOrNull(); + if (formatter == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the conf is off and you can't use this now + } + + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8)); + } + + @Override + protected void evaluate(LongColumnVector outputColVector, + BytesColumnVector inputColVector, int i) { + super.evaluate(outputColVector, inputColVector, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java index b48b0136eb..58e5671142 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java @@ -19,8 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.util.Arrays; -import java.sql.Timestamp; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -143,13 +143,24 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } - private void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) { + /** + * This is used by CastStringToTimestamp. + */ + protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) { + evaluate(outputColVector, inputColVector, i, null); + } + + /** + * This is used by CastStringToTimestampWithFormat. + */ + protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i, HiveDateTimeFormatter formatter) { try { org.apache.hadoop.hive.common.type.Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestampFromString( new String( inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], - "UTF-8")); + "UTF-8"), + formatter); outputColVector.set(i, timestamp.toSqlTimestamp()); } catch (Exception e) { outputColVector.setNullValue(i); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java new file mode 100644 index 0000000000..b2dd7de689 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO TIMESTAMP WITH FORMAT ). + */ +public class CastStringToTimestampWithFormat extends CastStringToTimestamp { + + private HiveDateTimeFormatter formatter; + + public CastStringToTimestampWithFormat() { + super(); + } + + public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found + } + + formatter = GenericUDF.getSqlDateTimeFormatterOrNull(); + if (formatter == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the conf is off and you can't use this now + } + + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8)); + } + + @Override + protected void evaluate(TimestampColumnVector outputColVector, + BytesColumnVector inputColVector, int i) { + super.evaluate(outputColVector, inputColVector, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java index adc3a9d7b9..1c83f47064 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.WrongFormatterException; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -42,12 +45,26 @@ PRINT_FORMATTER = builder.toFormatter(); } + private transient HiveDateTimeFormatter format; + public CastTimestampToString() { super(); + initFormatter(); } public CastTimestampToString(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); + initFormatter(); + } + + private void initFormatter() { + try { + format = new HiveJavaDateTimeFormatter(); + format.setFormatter(PRINT_FORMATTER); + } catch (WrongFormatterException e) { + // this will never happen + throw new RuntimeException(e); //todo frogmethod + } } // The assign method will be overridden for CHAR and VARCHAR. @@ -57,12 +74,16 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { @Override protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { - byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC) - .withNano(inV.nanos[i]) - .format(PRINT_FORMATTER).getBytes(); - assign(outV, i, temp, temp.length); + func(outV, inV, i, format); } + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i, HiveDateTimeFormatter formatter) { + String formattedLocalDateTime = formatter.format( + org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i])); + + byte[] temp = formattedLocalDateTime.getBytes(); + assign(outV, i, temp, temp.length); + } public static String getTimestampString(Timestamp ts) { return LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java new file mode 100644 index 0000000000..0e08ce2054 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO STRING WITH FORMAT ). + */ +public class CastTimestampToStringWithFormat extends CastTimestampToString { + private static final long serialVersionUID = 1L; + private transient HiveDateTimeFormatter formatter; + + public CastTimestampToStringWithFormat() { + super(); + } + + public CastTimestampToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found + } + + formatter = GenericUDF.getSqlDateTimeFormatterOrNull(); + if (formatter == null) { + throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found + } + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8)); + } + + @Override + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { + super.func(outV, inV, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index f22511ad67..2a65f0e74e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -248,7 +248,8 @@ castExpression expression KW_AS primitiveType - RPAREN -> ^(TOK_FUNCTION primitiveType expression) + (KW_FORMAT expression)? + RPAREN -> ^(TOK_FUNCTION primitiveType expression*) ; caseExpression diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java index 3cee0c1d1c..e40cb9f961 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java @@ -18,12 +18,13 @@ package org.apache.hadoop.hive.ql.udf; -import java.text.SimpleDateFormat; -import java.util.Date; import java.util.TimeZone; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -34,11 +35,17 @@ */ @Description(name = "from_unixtime", value = "_FUNC_(unix_time, format) - returns unix_time in the specified format", - extended = "Example:\n" + extended = "format is a String which specifies the format for output. If session-level " + + "setting hive.use.sql.datetime.formats is true, " + + "output_date_format will be interpreted as SQL:2016 datetime format. Otherwise it will " + + "be interpreted as java.text.SimpleDateFormat.\n" + + "Example:\n" + " > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;\n" + " '1970-01-01 00:00:00'") public class UDFFromUnixTime extends UDF { - private SimpleDateFormat formatter; + private HiveDateTimeFormatter formatter; + private boolean useSqlFormat = true; + private boolean lastUsedSqlFormats = true; private Text result = new Text(); private Text lastFormat = new Text(); @@ -58,9 +65,8 @@ public Text evaluate(IntWritable unixtime) { * @param unixtime * The number of seconds from 1970-01-01 00:00:00 * @param format - * See - * http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat - * .html + * See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html, + * or set hive.use.sql.datetime.formats=true to use SQL:2016 formats. * @return a String in the format specified. */ public Text evaluate(LongWritable unixtime, Text format) { @@ -92,9 +98,8 @@ public Text evaluate(LongWritable unixtime) { * @param unixtime * The number of seconds from 1970-01-01 00:00:00 * @param format - * See - * http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat - * .html + * See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html, + * or set hive.use.sql.datetime.formats=true to use SQL:2016 formats. * @return a String in the format specified. */ public Text evaluate(IntWritable unixtime, Text format) { @@ -112,21 +117,31 @@ public Text evaluate(IntWritable unixtime, Text format) { * @param unixtime * seconds of type long from 1970-01-01 00:00:00 * @param format - * display format. See - * http://java.sun.com/j2se/1.4.2/docs/api/java/text - * /SimpleDateFormat.html + * display format. + * See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html, + * or set hive.use.sql.datetime.formats=true to use SQL:2016 formats. * @return elapsed time in the given format. */ private Text eval(long unixtime, Text format) { + initFormatter(); + if (!format.equals(lastFormat)) { - formatter = new SimpleDateFormat(format.toString()); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setPattern(format.toString()); lastFormat.set(format); } // convert seconds to milliseconds - Date date = new Date(unixtime * 1000L); - result.set(formatter.format(date)); + Timestamp ts = Timestamp.ofEpochMilli(unixtime * 1000L); + result.set(formatter.format(ts)); return result; } + + private void initFormatter() { + useSqlFormat = GenericUDF.useSqlFormat(); + if (formatter == null || useSqlFormat != lastUsedSqlFormats) { + formatter = GenericUDF.getHiveDateTimeFormatter(useSqlFormat); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + lastUsedSqlFormats = useSqlFormat; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 6597f4b34b..9646230dbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -20,19 +20,25 @@ import java.io.Closeable; import java.io.IOException; +import java.util.TimeZone; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritableV2; @@ -48,12 +54,14 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; /** * A Generic User-defined function (GenericUDF) for the use with Hive. @@ -638,4 +646,42 @@ protected String getArgOrder(int i) { return i + ORDINAL_SUFFIXES[i % 10]; } } + + + public static boolean useSqlFormat() { + boolean useSqlFormat = HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT.defaultBoolVal; + SessionState ss = SessionState.get(); + if (ss != null) { + useSqlFormat = ss.getConf().getBoolVar(HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } + return useSqlFormat; + } + + /** + * For cast...(...with format...) UDFs between strings and datetime types. + * @return either a HiveSimpleDateFormatter or a HiveSqlDateTimeFormatter, depending on conf. + */ + public static HiveDateTimeFormatter getHiveDateTimeFormatter(boolean definitelyUseSqlFormat) { + HiveDateTimeFormatter formatter; + if (useSqlFormat() || definitelyUseSqlFormat) { + formatter = new HiveSqlDateTimeFormatter(); + } else { + formatter = new HiveSimpleDateFormatter(); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + } + return formatter; + } + + /** + * For functions that only need a HiveDateTimeFormatter if it is for SQL:2016 formats. + * Otherwise return null. + * Vectorized UDFs also use this. + */ + public static HiveDateTimeFormatter getSqlDateTimeFormatterOrNull() { + HiveDateTimeFormatter formatter = getHiveDateTimeFormatter(false); + if (formatter instanceof HiveSqlDateTimeFormatter) { + return formatter; + } + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java index 6df0913de6..69a49083a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java @@ -22,13 +22,15 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP; -import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.TimeZone; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -38,7 +40,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.DateUtils; /** * GenericUDFAddMonths. @@ -51,10 +52,13 @@ value = "_FUNC_(start_date, num_months, output_date_format) - " + "Returns the date that is num_months after start_date.", extended = "start_date is a string or timestamp indicating a valid date. " - + "num_months is a number. output_date_format is an optional String which specifies the format for output.\n" - + "The default output format is 'YYYY-MM-dd'.\n" + + "num_months is a number.\noutput_date_format is an optional String which specifies the " + + "format for output. If session-level setting hive.use.sql.datetime.formats is true, " + + "output_date_format will be interpreted as SQL:2016 datetime format. Otherwise it will " + + "be interpreted as java.text.SimpleDateFormat.\n" + + "The default output format is 'yyyy-MM-dd'.\n" + "Example:\n > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'." - + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;\n" + + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'yyyy-MM-dd HH:mm:ss') LIMIT 1;\n" + "'2018-02-28 14:15:16'.\n") @NDV(maxNdv = 250) // 250 seems to be reasonable upper limit for this public class GenericUDFAddMonths extends GenericUDF { @@ -63,10 +67,11 @@ private transient Converter[] dtConverters = new Converter[3]; private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); - private transient SimpleDateFormat formatter = null; + private transient HiveDateTimeFormatter formatter = null; private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); private transient Integer numMonthsConst; private transient boolean isNumMonthsConst; + private boolean useSql; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -75,23 +80,24 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen checkArgPrimitive(arguments, 0); checkArgPrimitive(arguments, 1); + formatter = getHiveDateTimeFormatter(useSql); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + if (arguments.length == 3) { if (arguments[2] instanceof ConstantObjectInspector) { checkArgPrimitive(arguments, 2); checkArgGroups(arguments, 2, tsInputTypes, STRING_GROUP); String fmtStr = getConstantStringValue(arguments, 2); if (fmtStr != null) { - formatter = new SimpleDateFormat(fmtStr); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setPattern(fmtStr); } } else { throw new UDFArgumentTypeException(2, getFuncName() + " only takes constant as " + getArgOrder(2) + " argument"); } } - if (formatter == null) { - //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd - formatter = DateUtils.getDateFormat(); + if (formatter.getPattern() == null) { + formatter.setPattern("yyyy-MM-dd"); } // the function should support both short date and full timestamp format @@ -143,7 +149,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } } - String res = formatter.format(calendar.getTime()); + String res = formatter.format(Timestamp.ofEpochMilli(calendar.getTimeInMillis())); output.set(res); return output; @@ -188,4 +194,15 @@ private boolean isLastDayOfMonth(Calendar cal) { int dd = cal.get(Calendar.DAY_OF_MONTH); return dd == maxDd; } + + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java index 6d3e86f921..cf27039f3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java @@ -20,12 +20,14 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; -import java.text.SimpleDateFormat; import java.util.TimeZone; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -45,8 +47,9 @@ */ @Description(name = "date_format", value = "_FUNC_(date/timestamp/string, fmt) - converts a date/timestamp/string " + "to a value of string in the format specified by the date format fmt.", - extended = "Supported formats are SimpleDateFormat formats - " - + "https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html. " + extended = "Supported formats are (1) SimpleDateFormat formats - " + + "https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html, " + + "or (2) SQL:2016 datetime formats if hive.use.sql.datetime.formats is set to true. " + "Second argument fmt should be constant.\n" + "Example: > SELECT _FUNC_('2015-04-08', 'y');\n '2015'") public class GenericUDFDateFormat extends GenericUDF { @@ -56,7 +59,8 @@ private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[2]; private final java.util.Date date = new java.util.Date(); private final Text output = new Text(); - private transient SimpleDateFormat formatter; + private transient HiveDateTimeFormatter formatter; + private boolean useSql; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -79,10 +83,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen String fmtStr = getConstantStringValue(arguments, 1); if (fmtStr != null) { try { - formatter = new SimpleDateFormat(fmtStr); + formatter = getHiveDateTimeFormatter(useSql); + formatter.setPattern(fmtStr); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } catch (IllegalArgumentException e) { - // ignore + //reset formatter if something went wrong + formatter = null; } } } else { @@ -110,8 +116,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { ts = Timestamp.ofEpochMilli(d.toEpochMilli()); } - date.setTime(ts.toEpochMilli()); - String res = formatter.format(date); + String res = formatter.format(ts); if (res == null) { return null; } @@ -128,4 +133,16 @@ public String getDisplayString(String[] children) { protected String getFuncName() { return "date_format"; } + + + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java index bcc4114099..6c3c3349bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import java.text.SimpleDateFormat; - import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol; @@ -46,7 +44,6 @@ + " '2009-07-29'") @VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class}) public class GenericUDFDateSub extends GenericUDFDateAdd { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); public GenericUDFDateSub() { this.signModifier = -1; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java index 70f57b7727..e1565a514e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java @@ -17,8 +17,11 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.MapredContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestampWithFormat; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -31,7 +34,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; @@ -48,13 +50,18 @@ * */ @Description(name = "timestamp", -value = "cast(date as timestamp) - Returns timestamp") +value = "cast( as timestamp [format ]) - Returns timestamp", +extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be " + + "used. hive.use.sql.datetime.formats must be turned on to use formats.") @VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class, - CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class}) + CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class, + CastStringToTimestampWithFormat.class}) public class GenericUDFTimestamp extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; private transient TimestampConverter tc; + private HiveDateTimeFormatter formatter = null; + private boolean useSql; /* * Integer value was interpreted to timestamp inconsistently in milliseconds comparing * to float/double in seconds. Since the issue exists for a long time and some users may @@ -88,6 +95,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); tc.setIntToTimestampInSeconds(intToTimestampInSeconds); + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) { + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1)); + tc.setDateTimeFormatter(formatter); + } + return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; } @@ -97,21 +111,36 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if (o0 == null) { return null; } - return tc.convert(o0); } @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (1 <= children.length && children.length <= 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); - sb.append(" AS TIMESTAMP)"); + sb.append(" AS TIMESTAMP"); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } + sb.append(")"); return sb.toString(); } public boolean isIntToTimestampInSeconds() { return intToTimestampInSeconds; } + + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java index 899abf76b8..b94b29cc3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java @@ -55,7 +55,7 @@ public GenericUDFToChar() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { + if (arguments.length < 1) { throw new UDFArgumentException("CHAR cast requires a value argument"); } try { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java index c309ffa5e3..0e1d17498f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java @@ -17,11 +17,16 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDateWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -36,16 +41,21 @@ * GenericUDFToDate */ @Description(name = "date", - value = "CAST( as DATE) - Returns the date represented by the date string.", - extended = "date_string is a string in the format 'yyyy-MM-dd.'" + value = "CAST( as DATE [FORMAT ]) - Returns the date represented by the date string.", + extended = "date_string is a string in the format 'yyyy-MM-dd.' " + + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be " + + "used for parsing. hive.use.sql.datetime.formats must be turned on for this feature." + "Example:\n " + " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n" + " '2009-01-01'") -@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class}) +@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class, + CastStringToDateWithFormat.class}) public class GenericUDFToDate extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; private transient DateConverter dc; + private HiveDateTimeFormatter formatter = null; + private boolean useSql; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -73,6 +83,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen "The function CAST as DATE takes only primitive types"); } + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) { + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1)); + dc.setDateTimeFormatter(formatter); + } + dc = new DateConverter(argumentOI, PrimitiveObjectInspectorFactory.writableDateObjectInspector); return PrimitiveObjectInspectorFactory.writableDateObjectInspector; @@ -90,12 +107,27 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length == 1 || children.length == 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); - sb.append(" AS DATE)"); + sb.append(" AS DATE"); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } + sb.append(")"); return sb.toString(); } + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java index d5764419d6..2e070a0bab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java @@ -17,7 +17,11 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -28,8 +32,10 @@ import org.slf4j.LoggerFactory; @Description(name = "string", -value = "CAST( as STRING) - Converts the argument to a string value.", -extended = "Example:\n " +value = "CAST( as STRING [FORMAT ]) - Converts the argument to a string value.", +extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be " + + "used. hive.use.sql.datetime.formats must be turned on to use formats.\n" + + "Example:\n " + " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n" + " '1234'") public class GenericUDFToString extends GenericUDF { @@ -37,13 +43,15 @@ private transient PrimitiveObjectInspector argumentOI; private transient TextConverter converter; + private HiveDateTimeFormatter formatter = null; + private boolean useSql; public GenericUDFToString() { } @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { + if (arguments.length < 1) { throw new UDFArgumentException("STRING cast requires a value argument"); } try { @@ -53,27 +61,50 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen "The function STRING takes only primitive types"); } + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) { + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1)); + converter.setDateTimeFormatter(formatter); + } + converter = new TextConverter(argumentOI); return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - Object o0 = arguments[0].get(); - if (o0 == null) { - return null; - } + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } - return converter.convert(o0); + return converter.convert(o0); } @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length == 1 || children.length == 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); - sb.append(" AS STRING)"); + sb.append(" AS STRING"); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } + sb.append(")"); return sb.toString(); } + + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java index eaced5af5a..e3371da9e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java @@ -17,7 +17,11 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -35,18 +39,24 @@ * Convert from string to TIMESTAMP WITH LOCAL TIME ZONE. */ @Description(name = "timestamp with local time zone", - value = "CAST(STRING as TIMESTAMP WITH LOCAL TIME ZONE) - returns the" + - "timestamp with local time zone represented by string.", - extended = "The string should be of format 'yyyy-MM-dd HH:mm:ss[.SSS...] ZoneId/ZoneOffset'. " + - "Examples of ZoneId and ZoneOffset are Asia/Shanghai and GMT+08:00. " + - "The time and zone parts are optional. If time is absent, '00:00:00.0' will be used. " + - "If zone is absent, the system time zone will be used.") + value = "CAST( as TIMESTAMP WITH LOCAL TIME ZONE [FORMAT ]) - returns the" + + "timestamp with local time zone represented by string. Optional parsing according to " + + "format string.", + extended = "The string should be of format 'yyyy-MM-dd HH:mm:ss[.SSS...] ZoneId/ZoneOffset'," + + "Examples of ZoneId and ZoneOffset are Asia/Shanghai and GMT+08:00. " + + "The time and zone parts are optional. If time is absent, '00:00:00.0' will be used." + + "If zone is absent, the system time zone will be used.\n" + + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be " + + "used. hive.use.sql.datetime.formats must be turned on to use formats.") + public class GenericUDFToTimestampLocalTZ extends GenericUDF implements SettableUDF { private transient PrimitiveObjectInspector argumentOI; private transient PrimitiveObjectInspectorConverter.TimestampLocalTZConverter converter; private TimestampLocalTZTypeInfo typeInfo; + private HiveDateTimeFormatter formatter = null; + private boolean useSql; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -76,6 +86,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen SettableTimestampLocalTZObjectInspector outputOI = (SettableTimestampLocalTZObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); converter = new TimestampLocalTZConverter(argumentOI, outputOI); + + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) { + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1)); + converter.setDateTimeFormatter(formatter); + } + return outputOI; } @@ -85,17 +103,22 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if (o0 == null) { return null; } + return converter.convert(o0); } @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length == 1 || children.length == 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); sb.append(" AS "); sb.append(typeInfo.getTypeName()); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } sb.append(")"); return sb.toString(); } @@ -110,4 +133,14 @@ public void setTypeInfo(TypeInfo typeInfo) throws UDFArgumentException { this.typeInfo = (TimestampLocalTZTypeInfo) typeInfo; } + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 3c3796e8a6..ab16ec3dd4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -18,14 +18,16 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.TimeZone; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; @@ -51,7 +53,10 @@ */ @Description(name = "to_unix_timestamp", value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp", - extended = "Converts the specified time to number of seconds since 1970-01-01.") + extended = "Converts the specified time to number of seconds since 1970-01-01.\n" + + "pattern is an optional string which specifies the format for output. If session-level " + + "setting hive.use.sql.datetime.formats is true, pattern will be interpreted as SQL:2016 " + + "datetime format. Otherwise it will be interpreted as java.text.SimpleDateFormat.") @VectorizedExpressions({VectorUDFUnixTimeStampDate.class, VectorUDFUnixTimeStampString.class, VectorUDFUnixTimeStampTimestamp.class}) public class GenericUDFToUnixTimeStamp extends GenericUDF { @@ -62,7 +67,8 @@ private transient Converter patternConverter; private transient String lasPattern = "yyyy-MM-dd HH:mm:ss"; - private transient final SimpleDateFormat formatter = new SimpleDateFormat(lasPattern); + private transient HiveDateTimeFormatter formatter = null; + private boolean useSql; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -82,6 +88,8 @@ protected void initializeInput(ObjectInspector[] arguments) throws UDFArgumentEx } } + formatter = getHiveDateTimeFormatter(useSql); + formatter.setPattern(lasPattern); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); PrimitiveObjectInspector arg1OI = (PrimitiveObjectInspector) arguments[0]; @@ -145,12 +153,12 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return null; } if (!patternVal.equals(lasPattern)) { - formatter.applyPattern(patternVal); + formatter.setPattern(patternVal); lasPattern = patternVal; } } try { - retValue.set(formatter.parse(textVal).getTime() / 1000); + retValue.set(formatter.parse(textVal).toEpochMilli() / 1000); return retValue; } catch (ParseException e) { return null; @@ -183,4 +191,15 @@ public String getDisplayString(String[] children) { sb.append(')'); return sb.toString(); } + + /** + * Get whether or not to use Sql formats. + * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use + * MapredContext conf. This is only called in runtime of MapRedTask. + */ + @Override public void configure(MapredContext context) { + super.configure(context); + useSql = + HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java index b9a2bc2b9f..b1713c7c95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java @@ -55,7 +55,7 @@ public GenericUDFToVarchar() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { + if (arguments.length < 1) { throw new UDFArgumentException("VARCHAR cast requires a value argument"); } try { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java index d560c62adb..a4cdf4a6e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java @@ -34,7 +34,10 @@ @Description(name = "unix_timestamp", value = "_FUNC_(date[, pattern]) - Converts the time to a number", extended = "Converts the specified time to number of seconds " - + "since 1970-01-01. The _FUNC_(void) overload is deprecated, use current_timestamp.") + + "since 1970-01-01. The _FUNC_(void) overload is deprecated, use current_timestamp.\n" + + "pattern is an optional string which specifies the format for output. If session-level " + + "setting hive.use.sql.datetime.formats is true, pattern will be interpreted as SQL:2016 " + + "datetime format. Otherwise it will be interpreted as java.text.SimpleDateFormat.") public class GenericUDFUnixTimeStamp extends GenericUDFToUnixTimeStamp { private static final Logger LOG = LoggerFactory.getLogger(GenericUDFUnixTimeStamp.class); private LongWritable currentTimestamp; // retValue is transient so store this separately. diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index 663237739e..0b4ed8d905 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.Date; import org.junit.Assert; import org.apache.hadoop.hive.serde2.RandomTypeUtil; @@ -115,18 +116,19 @@ public void testRoundToDecimalPlaces() throws HiveException { Assert.assertEquals(1.2346d, resultV.vector[7], Double.MIN_VALUE); } - static int DAYS_LIMIT = 365 * 9999; + final static int DAYS_LIMIT = 365 * 9999; + final static int SMALLEST_EPOCH_DAY = -365 * 1969; //approximate, so we get some negative values public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) { Random r = new Random(12099); VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector inV; TimestampColumnVector outV; - inV = new LongColumnVector(); - outV = new TimestampColumnVector(); + inV = new LongColumnVector(intValues.length); + outV = new TimestampColumnVector(intValues.length); for (int i = 0; i < intValues.length; i++) { - intValues[i] = r.nextInt() % DAYS_LIMIT; + intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT; inV.vector[i] = intValues[i]; } @@ -137,6 +139,36 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i return batch; } + public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) { + // get date in timestamp out, and change timestamp out to string out + VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues); + BytesColumnVector outV = new BytesColumnVector(intValues.length); + batch.cols[1] = outV; + return batch; + } + + // For testing CastDateToStringWithFormat with + // TestVectorTypeCastsWithFormat#testCastDateToStringWithFormat + public static VectorizedRowBatch getVectorizedRowBatchDateInStringOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + LongColumnVector dateColumnV; + BytesColumnVector stringColumnV; + dateColumnV = new LongColumnVector(); + stringColumnV = new BytesColumnVector(); + + dateColumnV.vector[0] = Date.valueOf("2019-12-31").toEpochDay(); + dateColumnV.vector[1] = Date.valueOf("1776-07-04").toEpochDay(); + dateColumnV.vector[2] = Date.valueOf("2012-02-29").toEpochDay(); + dateColumnV.vector[3] = Date.valueOf("1580-08-08").toEpochDay(); + dateColumnV.vector[4] = Date.valueOf("0005-01-01").toEpochDay(); + dateColumnV.vector[5] = Date.valueOf("9999-12-31").toEpochDay(); + + batch.cols[0] = dateColumnV; + batch.cols[1] = stringColumnV; + batch.size = 6; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() { VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector lcv; @@ -277,6 +309,24 @@ public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchStringInDateTimeOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + BytesColumnVector inV; + inV = new BytesColumnVector(); + inV.initBuffer(); + inV.setVal(0, StandardCharsets.UTF_8.encode("2019-12-31 00:00:00.999999999").array()); + inV.setVal(1, StandardCharsets.UTF_8.encode("1776-07-04 17:07:06.177617761").array()); + inV.setVal(2, StandardCharsets.UTF_8.encode("2012-02-29 23:59:59.999999999").array()); + inV.setVal(3, StandardCharsets.UTF_8.encode("1580-08-08 00:00:00").array()); + inV.setVal(4, StandardCharsets.UTF_8.encode("0005-01-01 00:00:00").array()); + inV.setVal(5, StandardCharsets.UTF_8.encode("9999-12-31 23:59:59.999999999").array()); + + batch.cols[0] = inV; + + batch.size = 6; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) { Random r = new Random(345); VectorizedRowBatch batch = new VectorizedRowBatch(2); @@ -297,6 +347,58 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] return batch; } + + public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut( + long[] epochSecondValues, int[] nanoValues) { + Random r = new Random(345); + VectorizedRowBatch batch = new VectorizedRowBatch(2); + batch.size = epochSecondValues.length; + + TimestampColumnVector inV; + BytesColumnVector outV; + inV = new TimestampColumnVector(batch.size); + outV = new BytesColumnVector(batch.size); + + for (int i = 0; i < batch.size; i++) { + Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r); + epochSecondValues[i] = randTimestamp.toEpochSecond(); + nanoValues[i] = randTimestamp.getNanos(); + inV.set(i, randTimestamp.toSqlTimestamp()); + } + + batch.cols[0] = inV; + batch.cols[1] = outV; + + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + TimestampColumnVector timestampColumnV; + BytesColumnVector stringColumnV; + timestampColumnV = new TimestampColumnVector(); + stringColumnV = new BytesColumnVector(); + + timestampColumnV.set(0, getSqlTimestamp("2019-12-31 19:20:21.999999999")); + timestampColumnV.set(1, getSqlTimestamp("1776-07-04 17:07:06.177617761")); + timestampColumnV.set(2, getSqlTimestamp("2012-02-29 23:59:59.999999999")); + timestampColumnV.set(3, getSqlTimestamp("1580-08-08 00:00:00")); + timestampColumnV.set(4, getSqlTimestamp("0005-01-01 00:00:00")); + timestampColumnV.set(5, getSqlTimestamp("9999-12-31 23:59:59.999999999")); + + batch.cols[0] = timestampColumnV; + batch.cols[1] = stringColumnV; + batch.size = 6; + return batch; + } + + private static java.sql.Timestamp getSqlTimestamp(String s) { + java.sql.Timestamp ts = java.sql.Timestamp.valueOf(s); + // subtract 8 hours because sql timestamps are assumed to be given in US/Pacific time + ts.setHours(ts.getHours() - 8); + return ts; + } + static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L; public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index 58fd7b030e..e10acfabdc 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -23,10 +23,13 @@ import static org.junit.Assert.assertTrue; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Random; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.common.type.Date; import org.junit.Assert; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; @@ -72,6 +75,30 @@ public void testVectorCastDoubleToLong() throws HiveException { Assert.assertEquals(1, resultV.vector[6]); } + // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final + // value because VM in test time zone is PST. + private final static long TIME_DIFFERENCE = 28800000L; + @Test + public void testCastDateToString() throws HiveException { + int[] intValues = new int[100]; + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastDateToString(0, 1); + expr.evaluate(b); + + String expected, result; + for (int i = 0; i < intValues.length; i++) { + expected = + new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString(); + byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(subbyte, StandardCharsets.UTF_8); + + Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result); + } + } + @Test public void testCastDateToTimestamp() throws HiveException { int[] intValues = new int[500]; @@ -192,6 +219,31 @@ public void testCastTimestampToDouble() throws HiveException { } } + @Test + public void testCastTimestampToString() throws HiveException { + int numberToTest = 100; + long[] epochSecondValues = new long[numberToTest]; + int[] nanoValues = new int[numberToTest]; + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastTimestampToString(0, 1); + expr.evaluate(b); + + String expected, result; + for (int i = 0; i < numberToTest; i++) { + expected = org.apache.hadoop.hive.common.type.Timestamp + .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString(); + byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(subbyte, StandardCharsets.UTF_8); + Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] + + " nanoseconds: " + nanoValues[i], + expected, result); + } + } + public byte[] toBytes(String s) { byte[] b = null; try { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java new file mode 100644 index 0000000000..647ec87fad --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java @@ -0,0 +1,143 @@ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.TestGenericUDFUtils; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +public class TestVectorTypeCastsWithFormat { + + @BeforeClass + public static void setup() { + //set hive.use.sql.datetime.formats to true + TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true); + } + + @Test + public void testCastDateToStringWithFormat() throws HiveException { + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOutFormatted(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + VectorExpression expr = new CastDateToStringWithFormat(0, "yyyy".getBytes(), 1); + expr.evaluate(b); + verifyString(0, "2019", resultV); + verifyString(1, "1776", resultV); + verifyString(2, "2012", resultV); + verifyString(3, "1580", resultV); + verifyString(4, "0005", resultV); + verifyString(5, "9999", resultV); + + expr = new CastDateToStringWithFormat(0, "MM".getBytes(), 1); + b.cols[1] = resultV = new BytesColumnVector(); + expr.evaluate(b); + verifyString(0, "12", resultV); + verifyString(1, "07", resultV); + verifyString(2, "02", resultV); + verifyString(3, "07", resultV); //frogmethod change to 08 when simpledatetime is removed + verifyString(4, "01", resultV); + verifyString(5, "12", resultV); + } + + @Test + public void testCastTimestampToStringWithFormat() throws HiveException { + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOutFormatted(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + VectorExpression expr = new CastTimestampToStringWithFormat(0, "yyyy".getBytes(), 1); + expr.evaluate(b); + + Assert.assertEquals("2019", getStringFromBytesColumnVector(resultV, 0)); + Assert.assertEquals("1776", getStringFromBytesColumnVector(resultV, 1)); + Assert.assertEquals("2012", getStringFromBytesColumnVector(resultV, 2)); + Assert.assertEquals("1580", getStringFromBytesColumnVector(resultV, 3)); + Assert.assertEquals("0005", getStringFromBytesColumnVector(resultV, 4)); + Assert.assertEquals("9999", getStringFromBytesColumnVector(resultV, 5)); + + b.cols[1] = resultV = new BytesColumnVector(); + expr = new CastTimestampToStringWithFormat(0, "HH".getBytes(), 1); + expr.evaluate(b); + + Assert.assertEquals("19", getStringFromBytesColumnVector(resultV, 0)); + Assert.assertEquals("17", getStringFromBytesColumnVector(resultV, 1)); + Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 2)); + Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 3)); + Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 4)); + Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 5)); + + //todo frogmethod test nanos (FFFFFFFFF) + } + + @Test + public void testCastStringToTimestampWithFormat() throws HiveException { + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchStringInDateTimeOutFormatted(); + TimestampColumnVector resultV; + b.cols[1] = resultV = new TimestampColumnVector(); + VectorExpression expr = new CastStringToTimestampWithFormat(0, "yyyy".getBytes(), 1); + expr.evaluate(b); + + verifyTimestamp("2019-01-01 00:00:00", resultV, 0); + verifyTimestamp("1776-01-01 00:00:00", resultV, 1); + verifyTimestamp("2012-01-01 00:00:00", resultV, 2); + verifyTimestamp("1580-01-11 00:00:00", resultV, 3); //frogmethod fails - expected -14579395200000 / actual -12306384000000 + verifyTimestamp("0004-12-30 00:00:00", resultV, 4); //frogmeth0d also fails + verifyTimestamp("9999-01-01 00:00:00", resultV, 5); + + b.cols[1] = resultV = new TimestampColumnVector(); + expr = new CastStringToTimestampWithFormat(0, "yyyy-MM".getBytes(), 1); + expr.evaluate(b); + + verifyTimestamp("2019-12-01 00:00:00", resultV, 0); + verifyTimestamp("1776-07-01 00:00:00", resultV, 1); + verifyTimestamp("2012-02-01 00:00:00", resultV, 2); + verifyTimestamp("1580-08-11 00:00:00", resultV, 3); //frogmethod this is wrong + verifyTimestamp("0004-12-30 00:00:00", resultV, 4); //frogmethod this is wrong + verifyTimestamp("9999-12-01 00:00:00", resultV, 5); + + //todo frogmethod test nanos (FFFFFFFFF) + } + + private void verifyTimestamp(String tsString, TimestampColumnVector resultV, int index) { + Assert.assertEquals(Timestamp.valueOf(tsString).toEpochMilli(), resultV.time[index]); + Assert.assertEquals(Timestamp.valueOf(tsString).getNanos(), resultV.nanos[index]); + } + + @Test + public void testCastStringToDateWithFormat() throws HiveException { + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchStringInDateTimeOutFormatted(); + LongColumnVector resultV; + b.cols[1] = resultV = new LongColumnVector(); + VectorExpression expr = new CastStringToDateWithFormat(0, "yyyy".getBytes(), 1); + expr.evaluate(b); + + Assert.assertEquals(Date.valueOf("2019-01-01").toEpochDay(), resultV.vector[0]); + Assert.assertEquals(Date.valueOf("1776-01-01").toEpochDay(), resultV.vector[1]); + Assert.assertEquals(Date.valueOf("2012-01-01").toEpochDay(), resultV.vector[2]); +// Assert.assertEquals(Date.valueOf("1580-01-01").toEpochDay(), resultV.vector[3]); //frogmethod fails +// Assert.assertEquals(Date.valueOf("0005-01-01").toEpochDay(), resultV.vector[4]); //frogmethod also fails + Assert.assertEquals(Date.valueOf("9999-01-01").toEpochDay(), resultV.vector[5]); + } + + private void verifyString(int resultIndex, String expected, BytesColumnVector resultV) { + String result = getStringFromBytesColumnVector(resultV, resultIndex); + Assert.assertEquals(expected, result); + } + + private String getStringFromBytesColumnVector(BytesColumnVector resultV, int i) { + String result; + byte[] resultBytes = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(resultBytes, StandardCharsets.UTF_8); + return result; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java new file mode 100644 index 0000000000..6bc5c14ff6 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.TestGenericUDFUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests UDFFromUnixTime. + */ +public class TestUDFFromUnixTime { + + @Test + public void testFromUnixTime() throws HiveException { + UDFFromUnixTime udf = new UDFFromUnixTime(); + + //int, no format + verifyInt(0, "1970-01-01 00:00:00", null, udf); + verifyInt(1296705906, "2011-02-03 04:05:06", null, udf); + verifyInt(1514818800, "2018-01-01 15:00:00", null, udf); + + //long, no format + verifyLong(0L, "1970-01-01 00:00:00", null, udf); + verifyLong(1296705906L, "2011-02-03 04:05:06", null, udf); + verifyLong(1514818800L, "2018-01-01 15:00:00", null, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "0995-01-05 15:00:00", null, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "0005-01-01 00:00:00", null, udf); + verifyLong(253402300799L, "9999-12-31 23:59:59", null, udf); + + //int with format + String format = "HH:mm:ss"; + verifyInt(0, "00:00:00", format, udf); + verifyInt(1296705906, "04:05:06", format, udf); + verifyInt(1514818800, "15:00:00", format, udf); + + //long with format + verifyLong(0L, "00:00:00", format, udf); + verifyLong(1296705906L, "04:05:06", format, udf); + verifyLong(1514818800L, "15:00:00", format, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "15:00:00", format, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "00:00:00", format, udf); + verifyLong(253402300799L, "23:59:59", format, udf); + + } + + @Test + public void testFromUnixTimeWithSqlFormat() throws HiveException { + TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true); + UDFFromUnixTime udf = new UDFFromUnixTime(); + + //int with format + String format = "HH:mm:ss"; //todo frogmethod this + verifyInt(0, "00:00:00", format, udf); + verifyInt(1296705906, "04:05:06", format, udf); + verifyInt(1514818800, "15:00:00", format, udf); + + //long with format + verifyLong(0L, "00:00:00", format, udf); + verifyLong(1296705906L, "04:05:06", format, udf); + verifyLong(1514818800L, "15:00:00", format, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "15:00:00", format, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "00:00:00", format, udf); + verifyLong(253402300799L, "23:59:59", format, udf); + } + + private void verifyInt(int value, String expected, String format, UDFFromUnixTime udf) { + IntWritable input = new IntWritable(value); + Text res; + if (format == null) { + res = udf.evaluate(input); + } else { + res = udf.evaluate(input, new Text(format)); + } + Assert.assertEquals(expected, res.toString()); + } + + private void verifyLong(long value, String expected, String format, UDFFromUnixTime udf) { + LongWritable input = new LongWritable(value); + Text res; + if (format == null) { + res = udf.evaluate(input); + } else { + res = udf.evaluate(input, new Text(format)); + } + Assert.assertEquals(expected, res.toString()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index 7c2ee15646..44a827b0e5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -35,9 +35,9 @@ public class TestGenericUDFAddMonths extends TestCase { - private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss"); - private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS"); - private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd"); + private final Text fmtTextWithTime = new Text("yyyy-MM-dd HH:mm:ss"); + private final Text fmtTextWithTimeAndms = new Text("yyyy-MM-dd HH:mm:ss.SSS"); + private final Text fmtTextWithoutTime = new Text("yyyy-MM-dd"); private final Text fmtTextInvalid = new Text("YYYY-abcdz"); public void testAddMonthsInt() throws HiveException { @@ -214,7 +214,33 @@ public void testAddMonthsLong() throws HiveException { } } + public void testSqlDateFormats() throws HiveException { + TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true); + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + + // format 1 + Text formatPatternYear = new Text("yyyy"); + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + formatPatternYear); + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + + runAndVerify("2014-12-31 23:59:59", -12, formatPatternYear,"2013", udf); + + // format 2 + Text formatPatternHour = new Text("HH"); // frogmethod todo hh24 + valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + formatPatternHour); + arguments[2] = valueOI2; + udf.initialize(arguments); + + runAndVerify("2014-12-31 23:59:59", -12, formatPatternYear,"23", udf); + } private void runAndVerify(String str, int months, String expResult, GenericUDF udf) throws HiveException { diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java new file mode 100644 index 0000000000..11c4a3a6ba --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java @@ -0,0 +1,103 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.BeforeClass; +import org.junit.Test; + +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertNull; + +public class TestGenericUDFCastWithFormat { + + @BeforeClass + public static void setup() { + TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true); + } + + @Test + public void testDateToStringWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToString(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + testCast(udf, inputOI, new DateWritableV2(Date.valueOf("2009-07-30")), "yyyy-MM-dd", "2009-07-30"); + testCast(udf, inputOI, new DateWritableV2(Date.valueOf("2009-07-30")), "yyyy", "2009"); + } + + @Test + public void testStringToDateWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToDate(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + testCast(udf, inputOI, "2009-07-30", "yyyy-MM-dd", "2009-07-30"); + testCast(udf, inputOI, "2009-07-30", "yyyy", "2009-01-01"); + //TODO + } + + @Test + public void testStringToTimestampWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFTimestamp(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + testCast(udf, inputOI, "2009-07-30 00:00:00", "yyyy-MM-dd HH:mm:ss", "2009-07-30 00:00:00"); + testCast(udf, inputOI, "2009-07-30 00:00:00", "yyyy", "2009-01-01 00:00:00"); + //TODO + } + + @Test + public void testStringToTimestampTZWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToTimestampLocalTZ(); + ((GenericUDFToTimestampLocalTZ) udf).setTypeInfo(new TimestampLocalTZTypeInfo("America/Los_Angeles")); //frogmethod probably needs to be local tz. + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + testCast(udf, inputOI, "2009-07-30 07:00:00 America/New_York", "yyyy-MM-dd HH:mm:ss", "2009-07-30 00:00:00.0 America/Los_Angeles"); //frogmethod change to HH=04 eventually + //TODO + } + + @Test + public void testTimestampToStringWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToString(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 00:00:00")), "yyyy-MM-dd HH:mm:ss", "2009-07-30 00:00:00"); + testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 00:00:00")), "yyyy", "2009"); + //TODO + } + + @Test + public void testTimestampTZToStringWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToString(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampTZObjectInspector; + testCast(udf, inputOI, new TimestampLocalTZWritable(new TimestampTZ()), "yyyy-MM-dd HH:mm:ss", "1969-12-31 16:00:00"); + testCast(udf, inputOI, new TimestampLocalTZWritable(new TimestampTZ()), "yyyy", "1969"); + //TODO + } + + private void testCast( + GenericUDF udf, ObjectInspector inputOI, Object input, String format, String output) + throws HiveException { + + ConstantObjectInspector formatOI = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format)); + ObjectInspector[] arguments = {inputOI, formatOI}; + udf.initialize(arguments); + + GenericUDF.DeferredObject valueObj = new GenericUDF.DeferredJavaObject(input); + GenericUDF.DeferredObject formatObj = new GenericUDF.DeferredJavaObject(new Text(format)); + GenericUDF.DeferredObject[] args = {valueObj, formatObj}; + + assertEquals(udf.getFuncName() + " test with input type " + inputOI.getTypeName() + + " failed ", output, udf.evaluate(args).toString()); + + // Try with null args + GenericUDF.DeferredObject[] nullArgs = { new GenericUDF.DeferredJavaObject(null) }; + assertNull(udf.getFuncName() + " with NULL arguments failed", udf.evaluate(nullArgs)); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java index 6a3cdda48a..a3573eee8b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java @@ -44,24 +44,32 @@ public void testDateFormatStr() throws HiveException { udf.initialize(arguments); // date str - runAndVerifyStr("2015-04-05", fmtText, "Sunday", udf); - runAndVerifyStr("2015-04-06", fmtText, "Monday", udf); - runAndVerifyStr("2015-04-07", fmtText, "Tuesday", udf); - runAndVerifyStr("2015-04-08", fmtText, "Wednesday", udf); - runAndVerifyStr("2015-04-09", fmtText, "Thursday", udf); - runAndVerifyStr("2015-04-10", fmtText, "Friday", udf); - runAndVerifyStr("2015-04-11", fmtText, "Saturday", udf); - runAndVerifyStr("2015-04-12", fmtText, "Sunday", udf); + runAndVerifyStr("2015-04-05", "Sunday", udf); + runAndVerifyStr("2015-04-06", "Monday", udf); + runAndVerifyStr("2015-04-07", "Tuesday", udf); + runAndVerifyStr("2015-04-08", "Wednesday", udf); + runAndVerifyStr("2015-04-09", "Thursday", udf); + runAndVerifyStr("2015-04-10", "Friday", udf); + runAndVerifyStr("2015-04-11", "Saturday", udf); + runAndVerifyStr("2015-04-12", "Sunday", udf); // ts str - runAndVerifyStr("2015-04-05 10:30:45", fmtText, "Sunday", udf); - runAndVerifyStr("2015-04-06 10:30:45", fmtText, "Monday", udf); - runAndVerifyStr("2015-04-07 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyStr("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyStr("2015-04-09 10:30", fmtText, "Thursday", udf); - runAndVerifyStr("2015-04-10 10:30:45.123", fmtText, "Friday", udf); - runAndVerifyStr("2015-04-11T10:30:45", fmtText, "Saturday", udf); - runAndVerifyStr("2015-04-12 10", fmtText, "Sunday", udf); + runAndVerifyStr("2015-04-05 10:30:45", "Sunday", udf); + runAndVerifyStr("2015-04-06 10:30:45", "Monday", udf); + runAndVerifyStr("2015-04-07 10:30:45", "Tuesday", udf); + runAndVerifyStr("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyStr("2015-04-09 10:30", "Thursday", udf); + runAndVerifyStr("2015-04-10 10:30:45.123", "Friday", udf); + runAndVerifyStr("2015-04-11T10:30:45", "Saturday", udf); + runAndVerifyStr("2015-04-12 10", "Sunday", udf); + + //make sure hour is ok + fmtText = new Text("hh"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyStr("2015-04-10 10:30:45.123", "10", udf); } public void testWrongDateStr() throws HiveException { @@ -73,10 +81,10 @@ public void testWrongDateStr() throws HiveException { ObjectInspector[] arguments = {valueOI0, valueOI1}; udf.initialize(arguments); - runAndVerifyStr("2016-02-30 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyStr("2014-01-32", fmtText, "Saturday", udf); - runAndVerifyStr("01/14/2014", fmtText, null, udf); - runAndVerifyStr(null, fmtText, null, udf); + runAndVerifyStr("2016-02-30 10:30:45", "Tuesday", udf); + runAndVerifyStr("2014-01-32", "Saturday", udf); + runAndVerifyStr("01/14/2014", null, udf); + runAndVerifyStr(null, null, udf); } public void testDateFormatDate() throws HiveException { @@ -89,14 +97,22 @@ public void testDateFormatDate() throws HiveException { udf.initialize(arguments); - runAndVerifyDate("2015-04-05", fmtText, "Sunday", udf); - runAndVerifyDate("2015-04-06", fmtText, "Monday", udf); - runAndVerifyDate("2015-04-07", fmtText, "Tuesday", udf); - runAndVerifyDate("2015-04-08", fmtText, "Wednesday", udf); - runAndVerifyDate("2015-04-09", fmtText, "Thursday", udf); - runAndVerifyDate("2015-04-10", fmtText, "Friday", udf); - runAndVerifyDate("2015-04-11", fmtText, "Saturday", udf); - runAndVerifyDate("2015-04-12", fmtText, "Sunday", udf); + runAndVerifyDate("2015-04-05", "Sunday", udf); + runAndVerifyDate("2015-04-06", "Monday", udf); + runAndVerifyDate("2015-04-07", "Tuesday", udf); + runAndVerifyDate("2015-04-08", "Wednesday", udf); + runAndVerifyDate("2015-04-09", "Thursday", udf); + runAndVerifyDate("2015-04-10", "Friday", udf); + runAndVerifyDate("2015-04-11", "Saturday", udf); + runAndVerifyDate("2015-04-12", "Sunday", udf); + + // make sure year is ok + fmtText = new Text("yyyy"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyDate("2015-04-08", "2015", udf); } public void testDateFormatTs() throws HiveException { @@ -109,15 +125,24 @@ public void testDateFormatTs() throws HiveException { udf.initialize(arguments); - runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyTs("2015-04-05 10:30:45", fmtText, "Sunday", udf); - runAndVerifyTs("2015-04-06 10:30:45", fmtText, "Monday", udf); - runAndVerifyTs("2015-04-07 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyTs("2015-04-09 10:30:45", fmtText, "Thursday", udf); - runAndVerifyTs("2015-04-10 10:30:45.123", fmtText, "Friday", udf); - runAndVerifyTs("2015-04-11 10:30:45.123456789", fmtText, "Saturday", udf); - runAndVerifyTs("2015-04-12 10:30:45", fmtText, "Sunday", udf); + runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyTs("2015-04-05 10:30:45", "Sunday", udf); + runAndVerifyTs("2015-04-06 10:30:45", "Monday", udf); + runAndVerifyTs("2015-04-07 10:30:45", "Tuesday", udf); + runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyTs("2015-04-09 10:30:45", "Thursday", udf); + runAndVerifyTs("2015-04-10 10:30:45.123", "Friday", udf); + runAndVerifyTs("2015-04-11 10:30:45.123456789", "Saturday", udf); + runAndVerifyTs("2015-04-12 10:30:45", "Sunday", udf); + + // make sure hour of day is ok + fmtText = new Text("HH"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyTs("2015-04-08 00:30:45", "00", udf); + } public void testNullFmt() throws HiveException { @@ -130,7 +155,7 @@ public void testNullFmt() throws HiveException { udf.initialize(arguments); - runAndVerifyStr("2015-04-05", fmtText, null, udf); + runAndVerifyStr("2015-04-05", null, udf); } public void testWrongFmt() throws HiveException { @@ -143,34 +168,55 @@ public void testWrongFmt() throws HiveException { udf.initialize(arguments); - runAndVerifyStr("2015-04-05", fmtText, null, udf); + runAndVerifyStr("2015-04-05", null, udf); + } + + public void testSqlDateFormats() throws HiveException { + TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true); + GenericUDFDateFormat udf = new GenericUDFDateFormat(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + // format 1 + Text fmtText = new Text("yyyy"); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + ObjectInspector[] arguments = { valueOI0, valueOI1 }; + udf.initialize(arguments); + + runAndVerifyStr("2015-04-05", "2015", udf); + + // format 2 + fmtText = new Text("MM"); //TODO mm + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + + runAndVerifyStr("2015-04-05", "04", udf); } - private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyStr(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = {valueObj0}; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } - private void runAndVerifyDate(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyDate(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new DateWritableV2( Date.valueOf(str)) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = {valueObj0}; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } - private void runAndVerifyTs(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyTs(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritableV2( Timestamp.valueOf(str)) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = {valueObj0}; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java index 61623d54c9..cd10311e07 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java @@ -74,6 +74,14 @@ public void testTimestamp() throws HiveException { // test null values runAndVerify(udf, null, null); + + ts = Timestamp.valueOf("1111-02-03 01:02:03"); + runAndVerify(udf, + new TimestampWritableV2(ts), + new LongWritable(ts.toEpochSecond())); + + // test null values + runAndVerify(udf, null, null); } public void testDate() throws HiveException { @@ -122,4 +130,29 @@ public void testString() throws HiveException { runAndVerify(udf2, null, new Text(format), null); runAndVerify(udf2, new Text(val), null, null); } + + // format argument (2nd arg) is only used when 1st argument is string + public void testStringWithSqlFormat() throws HiveException { + TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true); + + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + String val; + + // Try 2-arg version + GenericUDFToUnixTimeStamp udf2 = new GenericUDFToUnixTimeStamp(); + ObjectInspector[] args2 = {valueOI, valueOI}; + udf2.initialize(args2); + + val = "2001-02-02"; + String format = "yyyy"; + runAndVerify(udf2, + new Text(val), + new Text(format), + new LongWritable(Date.valueOf("2001-01-01").toEpochSecond())); + + // test null values + runAndVerify(udf2, null, null, null); + runAndVerify(udf2, null, new Text(format), null); + runAndVerify(udf2, new Text(val), null, null); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java index b74ff2526c..e62dadfcdf 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; import org.junit.Assert; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; @@ -26,6 +28,14 @@ public class TestGenericUDFUtils { + public static void setHiveUseSqlDateTimeFormats(boolean val) { + SessionState ss = SessionState.get(); + if (ss == null) { + ss = SessionState.start(new HiveConf()); + } + ss.getConf().setBoolVar(HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT, val); + } + @Test public void testFindText() throws Exception { diff --git ql/src/test/queries/clientpositive/sql_formats.q ql/src/test/queries/clientpositive/sql_formats.q new file mode 100644 index 0000000000..9c2d4560ba --- /dev/null +++ ql/src/test/queries/clientpositive/sql_formats.q @@ -0,0 +1,66 @@ +drop table if exists timestamps; +drop table if exists timestampLocalTzs; +drop table if exists dates; +drop table if exists strings; + +set hive.use.sql.datetime.formats=true; + +--non-vectorized +set hive.vectorized.execution.enabled=false; --frogmethod see below +set hive.fetch.task.conversion=none; --frogmethod do you want mapreduce or not + +create table timestamps (t timestamp) stored as parquet; +insert into timestamps values +("2019-01-01"), +("1969-12-31 23:59:59.999999999") +; +from timestamps select cast (t as string format "yyyy"); + + +create table dates (d date) stored as parquet; +insert into dates values +("2019-01-01"), +("1969-12-31") +; +from timestamps select cast (t as string format "yyyy"); + + +--todo frogmethod uncomment after implementation +--create table timestampLocalTzs (t timestamp with local time zone); +--insert into timestamps values +--("2019-01-01 America/New_York"), +--("1969-12-31 23:59:59.999999999 Europe/Rome") +--; +--from timestampLocalTzs select cast (t as string format "yyyy"); +--from timestampLocalTzs select cast (t as string format "hh"); -- todo change to hh24 maybe + + +create table strings (s string) stored as parquet; +insert into strings values +("2019"), +("1969") +; +from strings select cast (s as timestamp format "yyyy"); +from strings select cast (s as date format "yyyy"); +--from strings select cast (s as timestamp with local time zone format "yyyy"); //frogmethod + + +--correct descriptions +explain +from strings select cast (s as timestamp format "yyyy"); +explain +from strings select cast (s as date format "yyyy"); +--explain +--from strings select cast (s as timestamp with local time zone format "yyyy"); //frogmethod +explain +from timestamps select cast (t as string format "yyyy"); + + +--vectorized +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; + +--from timestamps select cast (t as string format "yyyy"); todo frogmethod uncomment after fixing +--from dates select cast (d as string format "yyyy"); +from strings select cast (s as timestamp format "yyyy"); +from strings select cast (s as date format "yyyy"); diff --git ql/src/test/results/clientpositive/sql_formats.q.out ql/src/test/results/clientpositive/sql_formats.q.out new file mode 100644 index 0000000000..78dd3f2e76 --- /dev/null +++ ql/src/test/results/clientpositive/sql_formats.q.out @@ -0,0 +1,260 @@ +PREHOOK: query: drop table if exists timestamps +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists timestamps +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists timestampLocalTzs +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists timestampLocalTzs +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists dates +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists dates +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists strings +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists strings +POSTHOOK: type: DROPTABLE +PREHOOK: query: --frogmethod do you want mapreduce or not + +create table timestamps (t timestamp) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps +POSTHOOK: query: --frogmethod do you want mapreduce or not + +create table timestamps (t timestamp) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps +PREHOOK: query: insert into timestamps values +("2019-01-01"), +("1969-12-31 23:59:59.999999999") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@timestamps +POSTHOOK: query: insert into timestamps values +("2019-01-01"), +("1969-12-31 23:59:59.999999999") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.t SCRIPT [] +PREHOOK: query: from timestamps select cast (t as string format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as string format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2019 +1969 +PREHOOK: query: create table dates (d date) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates +POSTHOOK: query: create table dates (d date) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates +PREHOOK: query: insert into dates values +("2019-01-01"), +("1969-12-31") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates +POSTHOOK: query: insert into dates values +("2019-01-01"), +("1969-12-31") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates +POSTHOOK: Lineage: dates.d SCRIPT [] +PREHOOK: query: from timestamps select cast (t as string format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as string format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2019 +1969 +PREHOOK: query: create table strings (s string) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@strings +POSTHOOK: query: create table strings (s string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@strings +PREHOOK: query: insert into strings values +("2019"), +("1969") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@strings +POSTHOOK: query: insert into strings values +("2019"), +("1969") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@strings +POSTHOOK: Lineage: strings.s SCRIPT [] +PREHOOK: query: from strings select cast (s as timestamp format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as timestamp format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2019-01-01 00:00:00 +1969-01-01 00:00:00 +PREHOOK: query: from strings select cast (s as date format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as date format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2019-01-01 +1969-01-01 +PREHOOK: query: explain +from strings select cast (s as timestamp format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: explain +from strings select cast (s as timestamp format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: strings + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( s AS TIMESTAMP FORMAT 'yyyy') (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +from strings select cast (s as date format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: explain +from strings select cast (s as date format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: strings + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( s AS DATE FORMAT 'yyyy') (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +from timestamps select cast (t as string format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: explain +from timestamps select cast (t as string format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( t AS STRING FORMAT 'yyyy') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: from strings select cast (s as timestamp format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as timestamp format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2019-01-01 00:00:00 +1969-01-01 00:00:00 +PREHOOK: query: from strings select cast (s as date format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as date format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2019-01-01 +1969-01-01 diff --git ql/src/test/results/clientpositive/udf_add_months.q.out ql/src/test/results/clientpositive/udf_add_months.q.out index 7150f59d68..42864b3e22 100644 --- ql/src/test/results/clientpositive/udf_add_months.q.out +++ ql/src/test/results/clientpositive/udf_add_months.q.out @@ -9,11 +9,11 @@ POSTHOOK: query: DESCRIBE FUNCTION EXTENDED add_months POSTHOOK: type: DESCFUNCTION add_months(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date. start_date is a string or timestamp indicating a valid date. num_months is a number. output_date_format is an optional String which specifies the format for output. -The default output format is 'YYYY-MM-dd'. +The default output format is 'yyyy-MM-dd'. Example: > SELECT add_months('2009-08-31', 1) FROM src LIMIT 1; '2009-09-30'. - > SELECT add_months('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1; + > SELECT add_months('2017-12-31 14:15:16', 2, 'yyyy-MM-dd HH:mm:ss') LIMIT 1; '2018-02-28 14:15:16'. Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFAddMonths diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java index 4b6a3d6c10..0c8405e50b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java @@ -21,7 +21,9 @@ import java.io.DataOutput; import java.io.IOException; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableUtils; @@ -147,6 +149,10 @@ public String toString() { return date.toString(); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + return date.toStringFormatted(formatter); + } + @Override public int hashCode() { return date.toEpochDay(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java index 3ffcb7a425..9927055a4c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.serde2.io; import com.google.common.base.Preconditions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.io.WritableComparable; @@ -29,6 +32,7 @@ import java.io.IOException; import java.time.ZoneId; import java.util.Arrays; +import java.util.TimeZone; /** * Writable for TimestampTZ. Copied from TimestampWritableV2. @@ -247,6 +251,19 @@ public String toString() { return timestampTZ.toString(); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + + populateTimestampTZ(); + Timestamp ts = Timestamp.ofEpochSecond( + timestampTZ.getZonedDateTime().toEpochSecond(), + timestampTZ.getNanos()); + formatter.setTimeZone(TimeZone.getTimeZone(timestampTZ.getZonedDateTime().getZone())); + return formatter.format(ts); + } + @Override public void write(DataOutput dataOutput) throws IOException { checkBytes(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java index 9aa7f19ab2..5972bd92b5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.time.format.DateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampUtils; @@ -387,6 +388,16 @@ public String toString() { return timestamp.format(DATE_TIME_FORMAT); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + if (timestampEmpty) { + populateTimestamp(); + } + return timestamp.toStringFormatted(formatter); + } + @Override public int hashCode() { long seconds = getSeconds(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index 84c027d51c..abdd87014c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -20,6 +20,7 @@ import java.time.ZoneId; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -250,6 +251,7 @@ public Object convert(Object input) { PrimitiveObjectInspector inputOI; SettableDateObjectInspector outputOI; Object r; + private HiveDateTimeFormatter formatter = null; public DateConverter(PrimitiveObjectInspector inputOI, SettableDateObjectInspector outputOI) { @@ -263,7 +265,11 @@ public Object convert(Object input) { return null; } return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input, - inputOI)); + inputOI, formatter)); + } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; } } @@ -272,6 +278,7 @@ public Object convert(Object input) { SettableTimestampObjectInspector outputOI; boolean intToTimestampInSeconds = false; Object r; + private HiveDateTimeFormatter formatter = null; public TimestampConverter(PrimitiveObjectInspector inputOI, SettableTimestampObjectInspector outputOI) { @@ -289,7 +296,11 @@ public Object convert(Object input) { return null; } return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestamp(input, - inputOI, intToTimestampInSeconds)); + inputOI, intToTimestampInSeconds, formatter)); + } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; } } @@ -298,6 +309,7 @@ public Object convert(Object input) { final SettableTimestampLocalTZObjectInspector outputOI; final Object r; final ZoneId timeZone; + private HiveDateTimeFormatter formatter = null; public TimestampLocalTZConverter( PrimitiveObjectInspector inputOI, @@ -314,7 +326,12 @@ public Object convert(Object input) { return null; } - return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestampLocalTZ(input, inputOI, timeZone)); + return outputOI.set(r, + PrimitiveObjectInspectorUtils.getTimestampLocalTZ(input, inputOI, timeZone, formatter)); + } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; } } @@ -416,6 +433,7 @@ public Object convert(Object input) { private static byte[] trueBytes = {'T', 'R', 'U', 'E'}; private static byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'}; + private HiveDateTimeFormatter formatter = null; public TextConverter(PrimitiveObjectInspector inputOI) { // The output ObjectInspector is writableStringObjectInspector. @@ -486,14 +504,16 @@ public Text convert(Object input) { } return t; case DATE: - t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); + t.set(((DateObjectInspector) inputOI) + .getPrimitiveWritableObject(input).toStringFormatted(formatter)); return t; case TIMESTAMP: t.set(((TimestampObjectInspector) inputOI) - .getPrimitiveWritableObject(input).toString()); + .getPrimitiveWritableObject(input).toStringFormatted(formatter)); return t; case TIMESTAMPLOCALTZ: - t.set(((TimestampLocalTZObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); + t.set(((TimestampLocalTZObjectInspector) inputOI) + .getPrimitiveWritableObject(input).toStringFormatted(formatter)); return t; case INTERVAL_YEAR_MONTH: t.set(((HiveIntervalYearMonthObjectInspector) inputOI) @@ -520,6 +540,10 @@ public Text convert(Object input) { throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName()); } } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; + } } /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 3886b202c7..72d45d93f0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -29,6 +29,9 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -1113,6 +1116,11 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi) } public static Date getDate(Object o, PrimitiveObjectInspector oi) { + return getDate(o, oi, null); + } + + public static Date getDate( + Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) { if (o == null) { return null; } @@ -1125,13 +1133,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { StringObjectInspector soi = (StringObjectInspector) oi; String s = soi.getPrimitiveJavaObject(o).trim(); try { - if (s.length() == DATE_LENGTH) { - result = Date.valueOf(s); - } else { - Timestamp ts = getTimestampFromString(s); - if (ts != null) { - result = Date.ofEpochMilli(ts.toEpochMilli()); - } + Date date = getDateFromString(s, formatter); + if (date != null) { + result = date; } } catch (IllegalArgumentException e) { // Do nothing @@ -1141,13 +1145,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { case VARCHAR: { String val = getString(o, oi).trim(); try { - if (val.length() == DATE_LENGTH) { - result = Date.valueOf(val); - } else { - Timestamp ts = getTimestampFromString(val); - if (ts != null) { - result = Date.ofEpochMilli(ts.toEpochMilli()); - } + Date date = getDateFromString(val, formatter); + if (date != null) { + result = date; } } catch (IllegalArgumentException e) { // Do nothing @@ -1177,11 +1177,46 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { return result; } + private final static int DATE_LENGTH = "YYYY-MM-DD".length(); + private static Date getDateFromString(String s, HiveDateTimeFormatter formatter) { + + // with SQL formats + if (formatter != null) { + try { + return Date.valueOf(s, formatter); + } catch (ParseException e) { + return null; + } + } + + // without SQL formats + if (s.length() == DATE_LENGTH) { + return Date.valueOf(s); + } else { + Timestamp ts = getTimestampFromString(s); + if (ts != null) { + return Date.ofEpochMilli(ts.toEpochMilli()); + } + } + return null; + } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { return getTimestamp(o, oi, false); } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) { + return getTimestamp(o, oi, false, formatter); + } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, boolean intToTimestampInSeconds) { + return getTimestamp(o, inputOI, intToTimestampInSeconds, null); + } + + public static Timestamp getTimestamp(Object o, + PrimitiveObjectInspector inputOI, + boolean intToTimestampInSeconds, + HiveDateTimeFormatter format) { if (o == null) { return null; } @@ -1225,11 +1260,11 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, case STRING: StringObjectInspector soi = (StringObjectInspector) inputOI; String s = soi.getPrimitiveJavaObject(o); - result = getTimestampFromString(s); + result = getTimestampFromString(s, format); break; case CHAR: case VARCHAR: - result = getTimestampFromString(getString(o, inputOI)); + result = getTimestampFromString(getString(o, inputOI), format); break; case DATE: result = Timestamp.ofEpochMilli( @@ -1254,15 +1289,17 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, return result; } - private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length(); - private final static int DATE_LENGTH = "YYYY-MM-DD".length(); - public static Timestamp getTimestampFromString(String s) { + return getTimestampFromString(s, null); + } + + public static Timestamp getTimestampFromString(String s, HiveDateTimeFormatter formatter) { + s = s.trim(); s = trimNanoTimestamp(s); try { - return TimestampUtils.stringToTimestamp(s); + return TimestampUtils.stringToTimestamp(s, formatter); } catch (IllegalArgumentException e) { return null; } @@ -1284,21 +1321,13 @@ private static String trimNanoTimestamp(String s) { return s; } - private static boolean isValidTimeStamp(final String s) { - if (s.length() == TS_LENGTH || - (s.contains(".") && - s.substring(0, s.indexOf('.')).length() == TS_LENGTH)) { - // Possible timestamp - if (s.charAt(DATE_LENGTH) == '-') { - return false; - } - return true; - } - return false; + public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi, + ZoneId timeZone) { + return getTimestampLocalTZ(o, oi, timeZone, null); } public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi, - ZoneId timeZone) { + ZoneId timeZone, HiveDateTimeFormatter formatter) { if (o == null) { return null; } @@ -1309,7 +1338,7 @@ public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector case STRING: { StringObjectInspector soi = (StringObjectInspector) oi; String s = soi.getPrimitiveJavaObject(o).trim(); - return TimestampTZUtil.parseOrNull(trimNanoTimestamp(s), timeZone); + return TimestampTZUtil.parseOrNull(trimNanoTimestamp(s), timeZone, formatter); } case CHAR: case VARCHAR: {