diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
new file mode 100644
index 0000000000..e0622473ce
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.util.TimeZone;
+
+/**
+ * Interface used for formatting and parsing timestamps. Initially created so that user is able to
+ * optionally format datetime objects into strings and parse strings into datetime objects with
+ * SQL:2016 semantics, as well as with the legacy (java.text.SimpleDateFormat) format.
+ */
+public interface HiveDateTimeFormatter {
+
+ /**
+ * Only used for HiveSimpleDateFormatter, which is a wrapper for the given SimpleDateFormat
+ * object.
+ */
+ void setFormatter(SimpleDateFormat simpleDateFormat) throws WrongFormatterException;
+
+ /**
+ * Only used for HiveJavaDateTimeFormatter, which is a wrapper for the given DateTimeFormatter
+ * object.
+ */
+ void setFormatter(DateTimeFormatter dateTimeFormatter) throws WrongFormatterException;
+
+ /**
+ * Format the given timestamp into a string.
+ */
+ String format(Timestamp ts);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws ParseException if string cannot be parsed.
+ */
+ Timestamp parse(String string) throws ParseException;
+
+ /**
+ * Set the format pattern to be used for formatting timestamps or parsing strings.
+ * Different HiveDateTimeFormatter implementations interpret some patterns differently. For
+ * example, HiveSimpleDateFormatter interprets the string "mm" as minute, while
+ * HiveSqlDateTimeFormatter interprets it as month.
+ * This method parses the pattern into tokens, so it comes with some performance overhead.
+ */
+ void setPattern(String pattern);
+
+ /**
+ * Get the format pattern to be used for formatting timestamps or parsing strings.
+ */
+ String getPattern();
+
+ /**
+ * Set the time zone of the formatter. Only HiveSimpleDateFormatter uses this.
+ */
+ void setTimeZone(TimeZone timeZone);
+
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java
new file mode 100644
index 0000000000..147ece5336
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.TimeZone;
+
+/**
+ * Wrapper for DateTimeFormatter in the java.time package.
+ */
+public class HiveJavaDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private DateTimeFormatter formatter;
+
+ @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) {
+ this.formatter = dateTimeFormatter;
+ }
+
+ @Override public String format(Timestamp ts) {
+ return formatter.format(
+ LocalDateTime.ofInstant(
+ Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneId.of("UTC")));
+ }
+
+ @Override public Timestamp parse(String string) {
+ return Timestamp.valueOf(string);
+ }
+
+ // unused methods
+ @Override public void setPattern(String pattern) {}
+ @Override public String getPattern() {
+ return null;
+ }
+ @Override public void setTimeZone(TimeZone timeZone) {}
+ @Override public void setFormatter(SimpleDateFormat simpleDateFormat)
+ throws WrongFormatterException {
+ throw new WrongFormatterException("HiveJavaDateTimeFormatter formatter wraps an object of type"
+ + "java.time.format.DateTimeFormatter, formatter cannot be of type "
+ + "java.text.SimpleDateFormat");
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java
new file mode 100644
index 0000000000..db7822b38f
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.TimeZone;
+
+/**
+ * Wrapper for java.text.SimpleDateFormat.
+ */
+public class HiveSimpleDateFormatter implements HiveDateTimeFormatter {
+
+ private SimpleDateFormat format = new SimpleDateFormat();
+ private String pattern;
+
+ public HiveSimpleDateFormatter() {}
+
+ @Override public void setFormatter(SimpleDateFormat simpleDateFormat) {
+ this.format = simpleDateFormat;
+ }
+
+ @Override public String format(Timestamp ts) {
+ Date date = new Date(ts.toEpochMilli());
+ return format.format(date);
+ }
+
+ @Override public Timestamp parse(String string) throws ParseException {
+ try {
+ Date date = format.parse(string);
+ return Timestamp.ofEpochMilli(date.getTime());
+ } catch (java.text.ParseException e) {
+ throw new ParseException(
+ "String " + string + " could not be parsed by java.text.SimpleDateFormat: " + format);
+ }
+ }
+
+ @Override public void setPattern(String pattern) {
+ format.applyPattern(pattern);
+ this.pattern = pattern;
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+
+ @Override public void setTimeZone(TimeZone timeZone) {
+ format.setTimeZone(timeZone);
+ }
+
+ /// unused methods
+ @Override public void setFormatter(DateTimeFormatter dateTimeFormatter)
+ throws WrongFormatterException {
+ throw new WrongFormatterException(
+ "HiveSimpleDateFormatter formatter wraps an object of type java.text.SimpleDateFormat, "
+ + "formatter cannot be of type java.time.format.DateTimeFormatter");
+ }
+
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..1ccaa5cf2b
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.TimeZone;
+
+/**
+ * Formatter using SQL:2016 datetime patterns.
+ */
+
+public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private String pattern;
+ private TimeZone timeZone;
+
+ public HiveSqlDateTimeFormatter() {}
+
+ @Override public void setPattern(String pattern) {
+ this.pattern = pattern;
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+
+ @Override public String format(Timestamp ts) {
+ //TODO replace with actual implementation:
+ HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter();
+ formatter.setPattern(pattern);
+ if (timeZone != null) formatter.setTimeZone(timeZone);
+ else formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC));
+ return formatter.format(ts);
+ }
+
+ @Override public Timestamp parse(String string) throws ParseException {
+ //TODO replace with actual implementation:
+ // todo should be able to remove the time zone (city) from tstzs; if it doesn't then deal with it in TimestampTZUtil#parseOrNull(java.lang.String, java.time.ZoneId, org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter)
+
+ HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter();
+ formatter.setPattern(pattern);
+ if (timeZone != null) formatter.setTimeZone(timeZone);
+ else formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC));
+ try {
+ return formatter.parse(string);
+ } catch (Exception e) {
+ throw new ParseException(e);
+ }
+ }
+
+ @Override public void setTimeZone(TimeZone timeZone) {
+ this.timeZone = timeZone;
+ }
+
+ // unused methods
+ @Override public void setFormatter(DateTimeFormatter dateTimeFormatter)
+ throws WrongFormatterException {
+ throw new WrongFormatterException("HiveSqlDateTimeFormatter is not a wrapper for "
+ + "java.time.format.DateTimeFormatter, use HiveJavaDateTimeFormatter instead.");
+ }
+ @Override public void setFormatter(SimpleDateFormat simpleDateFormat)
+ throws WrongFormatterException {
+ throw new WrongFormatterException("HiveSqlDateTimeFormatter is not a wrapper for "
+ + "java.text.SimpleDateFormat, use HiveSimpleDateFormatter instead.");
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java
new file mode 100644
index 0000000000..a7ac231682
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+/**
+ * Exception indicating that the given string could not be parsed into a datetime object.
+ */
+public class ParseException extends Exception {
+
+ public ParseException(String message) {
+ super(message);
+ }
+
+ public ParseException(Throwable cause) {
+ super(cause);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java
new file mode 100644
index 0000000000..972e7e934f
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+/**
+ * Exception indicating that a wrapper class was given the wrong type of object to wrap.
+ * E.g. HiveSimpleDateFormatter wraps a SimpleDateFormat object, and cannot be given a
+ * java.time.DateTimeFormatter.
+ */
+public class WrongFormatterException extends Exception {
+
+ public WrongFormatterException(String message) {
+ super(message);
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..056b80035b
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,19 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
\ No newline at end of file
diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java
index 6ecfcf65c9..feb09dee16 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Date.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Date.java
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
+
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
@@ -72,6 +75,13 @@ public String toString() {
return localDate.format(PRINT_FORMATTER);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ return formatter.format(Timestamp.ofEpochMilli(toEpochMilli()));
+ }
+
public int hashCode() {
return localDate.hashCode();
}
@@ -137,6 +147,14 @@ public static Date valueOf(String s) {
return new Date(localDate);
}
+ public static Date valueOf(String s, HiveDateTimeFormatter formatter) throws ParseException {
+ if (formatter == null) {
+ return valueOf(s);
+ }
+ s = s.trim();
+ return Date.ofEpochMilli(formatter.parse(s).toEpochMilli());
+ }
+
public static Date ofEpochDay(int epochDay) {
return new Date(LocalDate.ofEpochDay(epochDay));
}
diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
index a8b7b6d186..587d31b494 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
+
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
@@ -101,6 +104,13 @@ public String toString() {
return localDateTime.format(PRINT_FORMATTER);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ return formatter.format(this);
+ }
+
public int hashCode() {
return localDateTime.hashCode();
}
@@ -166,6 +176,16 @@ public static Timestamp valueOf(String s) {
return new Timestamp(localDateTime);
}
+ public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter)
+ throws ParseException {
+ if (formatter == null) {
+ return valueOf(s);
+ }
+
+ s = s.trim();
+ return formatter.parse(s);
+ }
+
public static Timestamp ofEpochSecond(long epochSecond) {
return ofEpochSecond(epochSecond, 0);
}
diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
index 4708d35a78..6bfbb8e71a 100644
--- common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
+++ common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
@@ -34,6 +34,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -110,6 +112,26 @@ private static String handleSingleDigitHourOffset(String s) {
return s;
}
+ public static TimestampTZ parseOrNull(
+ String s, ZoneId convertToTimeZone, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return parseOrNull(s, convertToTimeZone);
+ }
+
+ Timestamp ts;
+ try {
+ ts = formatter.parse(s);
+ } catch (ParseException e) {
+ return null;
+ }
+ if (ts == null) {
+ return null;
+ }
+ TimestampTZ tsLTZ = new TimestampTZ(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC);
+ // change time zone to default timeZone, retaining same instant
+ tsLTZ.setZonedDateTime(tsLTZ.getZonedDateTime().withZoneSameInstant(convertToTimeZone));
+ return tsLTZ;
+ }
public static TimestampTZ parseOrNull(String s, ZoneId defaultTimeZone) {
try {
diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
index f26f8ae01e..38b00fac2c 100644
--- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
+++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import java.math.BigDecimal;
@@ -171,6 +173,18 @@ public static long millisToSeconds(long millis) {
private static final int DATE_LENGTH = "YYYY-MM-DD".length();
+ public static Timestamp stringToTimestamp(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return stringToTimestamp(s);
+ }
+
+ try {
+ return Timestamp.valueOf(s, formatter);
+ } catch (ParseException e) {
+ return null;
+ }
+ }
+
public static Timestamp stringToTimestamp(String s) {
s = s.trim();
// Handle simpler cases directly avoiding exceptions
diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0c2bd1e5ad..c753ca06f4 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1543,6 +1543,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal
"used instead. Time-zone IDs can be specified as region-based zone IDs (based on IANA time-zone data),\n" +
"abbreviated zone IDs, or offset IDs."),
+ HIVE_USE_SQL_DATETIME_FORMAT("hive.use.sql.datetime.formats", false,
+ "Datetime used in \"CAST...AS...FORMAT \" statements will be parsed " +
+ "and formatted according to the SQL:2016 semantics instead of the original " +
+ "SimpleDateFormatter format. This applies to functions that take a datetime format as " +
+ "well; e.g. date_format, add_months."),
+
HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false,
"Whether to log Hive query, query plan, runtime statistics etc."),
diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java
index 5db14f1906..84a8ae81db 100644
--- common/src/java/org/apache/hive/common/util/DateParser.java
+++ common/src/java/org/apache/hive/common/util/DateParser.java
@@ -17,6 +17,8 @@
*/
package org.apache.hive.common.util;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
import org.apache.hadoop.hive.common.type.Date;
/**
@@ -36,10 +38,14 @@ public Date parseDate(String strValue) {
}
public boolean parseDate(String strValue, Date result) {
+ return parseDate(strValue, result, null);
+ }
+
+ public boolean parseDate(String strValue, Date result, HiveDateTimeFormatter formatter) {
Date parsedVal;
try {
- parsedVal = Date.valueOf(strValue);
- } catch (IllegalArgumentException e) {
+ parsedVal = Date.valueOf(strValue, formatter);
+ } catch (IllegalArgumentException | ParseException e) {
parsedVal = null;
}
if (parsedVal == null) {
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java
new file mode 100644
index 0000000000..0e5574075a
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.temporal.ChronoField;
+
+/**
+ * Test class for HiveJavaDateTimeFormatter.
+ */
+public class TestHiveJavaDateTimeFormatter {
+
+ private static final DateTimeFormatter DATE_TIME_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+ builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd();
+ DATE_TIME_FORMATTER = builder.toFormatter();
+ }
+ private HiveDateTimeFormatter formatter = new HiveJavaDateTimeFormatter();
+
+ @Before
+ public void setUp() throws WrongFormatterException {
+ formatter.setFormatter(DATE_TIME_FORMATTER);
+ }
+
+ @Test
+ public void testFormat() {
+ Timestamp ts = Timestamp.valueOf("2019-01-01 00:00:00.99999");
+ Assert.assertEquals("2019-01-01 00:00:00.99999", formatter.format(ts));
+ }
+
+ @Test
+ public void testParse() throws ParseException {
+ String s = "2019-01-01 00:00:00.99999";
+ Assert.assertEquals(Timestamp.valueOf("2019-01-01 00:00:00.99999"), formatter.parse(s));
+ }
+
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java
new file mode 100644
index 0000000000..51fd92e37b
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.text.SimpleDateFormat;
+import java.time.ZoneOffset;
+import java.util.TimeZone;
+
+/**
+ * Tests HiveSimpleDateFormatter.
+ */
+public class TestHiveSimpleDateFormatter {
+
+ private HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter();
+
+ @Before
+ public void setUp() throws WrongFormatterException {
+ formatter.setFormatter(new SimpleDateFormat());
+ formatter.setPattern("yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC));
+ }
+
+ @Test
+ public void testFormat() {
+ verifyFormat("2019-01-01 01:01:01");
+ verifyFormat("2019-01-01 00:00:00");
+ verifyFormat("1960-01-01 23:00:00");
+ }
+
+ private void verifyFormat(String s) {
+ Timestamp ts = Timestamp.valueOf(s);
+ Assert.assertEquals(s, formatter.format(ts));
+ }
+
+ @Test
+ public void testParse() throws ParseException {
+ verifyParse("2019-01-01 01:10:10");
+ verifyParse("1960-01-01 23:00:00");
+
+ }
+
+ private void verifyParse(String s) throws ParseException {
+ Timestamp ts = Timestamp.valueOf(s);
+ Assert.assertEquals(ts, formatter.parse(s));
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..312e13cf2a
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.junit.Test;
+
+/**
+ * Test class for HiveSqlDateTimeFormatter.
+ */
+
+public class TestHiveSqlDateTimeFormatter {
+
+ private HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+
+ @Test
+ public void testFormat() {
+ }
+
+ @Test
+ public void testParse() throws ParseException {
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..056b80035b
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,19 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 5ff338660f..9c5e18879f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -43,6 +43,7 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastCharToBinary;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal;
@@ -70,6 +71,7 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal;
@@ -3136,9 +3138,17 @@ private VectorExpression getCastToString(List childExpr, TypeInfo
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) { //second argument will be format string
+ return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastDateToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) { //second argument will be format string
+ return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastTimestampToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
// STRING and VARCHAR types require no conversion, so use a no-op.
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
new file mode 100644
index 0000000000..61b466df3e
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastDateToStringWithFormat extends CastDateToString {
+ private static final long serialVersionUID = 1L;
+ protected transient Date dt;
+ private transient HiveDateTimeFormatter formatter;
+
+ public CastDateToStringWithFormat() {
+ super();
+ }
+
+ public CastDateToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found
+ }
+
+ formatter = GenericUDF.getSqlDateTimeFormatterOrNull();
+ if (formatter == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found
+ }
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8));
+ }
+
+ // The assign method will be overridden for CHAR and VARCHAR.
+ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
+ outV.setVal(i, bytes, 0, length);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ byte[] temp = formatter.format(
+ Timestamp.ofEpochMilli(Date.ofEpochDay((int) vector[i]).toEpochMilli()))
+ .getBytes();
+ assign(outV, i, temp, temp.length);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.INT_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index a6dff12e1a..44a451b3bc 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -151,10 +152,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ /**
+ * Used by CastStringToDate.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ evaluate(outputColVector, inV, i, null);
+ }
+
+ /**
+ * Used by CastStringToDateWithFormat.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i,
+ HiveDateTimeFormatter formatter) {
String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
Date hDate = new Date();
- if (dateParser.parseDate(dateString, hDate)) {
+ if (dateParser.parseDate(dateString, hDate, formatter)) {
outputColVector.vector[i] = DateWritableV2.dateToDays(hDate);
return;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
new file mode 100644
index 0000000000..a127aaf536
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO DATE WITH FORMAT ).
+ */
+public class CastStringToDateWithFormat extends CastStringToDate {
+
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToDateWithFormat() {
+ super();
+ }
+
+ public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found
+ }
+
+ formatter = GenericUDF.getSqlDateTimeFormatterOrNull();
+ if (formatter == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the conf is off and you can't use this now
+ }
+
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8));
+ }
+
+ @Override
+ protected void evaluate(LongColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
index b48b0136eb..58e5671142 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
@@ -19,8 +19,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.util.Arrays;
-import java.sql.Timestamp;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -143,13 +143,24 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ /**
+ * This is used by CastStringToTimestamp.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ evaluate(outputColVector, inputColVector, i, null);
+ }
+
+ /**
+ * This is used by CastStringToTimestampWithFormat.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i, HiveDateTimeFormatter formatter) {
try {
org.apache.hadoop.hive.common.type.Timestamp timestamp =
PrimitiveObjectInspectorUtils.getTimestampFromString(
new String(
inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i],
- "UTF-8"));
+ "UTF-8"),
+ formatter);
outputColVector.set(i, timestamp.toSqlTimestamp());
} catch (Exception e) {
outputColVector.setNullValue(i);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
new file mode 100644
index 0000000000..b2dd7de689
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO TIMESTAMP WITH FORMAT ).
+ */
+public class CastStringToTimestampWithFormat extends CastStringToTimestamp {
+
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToTimestampWithFormat() {
+ super();
+ }
+
+ public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found
+ }
+
+ formatter = GenericUDF.getSqlDateTimeFormatterOrNull();
+ if (formatter == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the conf is off and you can't use this now
+ }
+
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8));
+ }
+
+ @Override
+ protected void evaluate(TimestampColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
index adc3a9d7b9..1c83f47064 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.WrongFormatterException;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -42,12 +45,26 @@
PRINT_FORMATTER = builder.toFormatter();
}
+ private transient HiveDateTimeFormatter format;
+
public CastTimestampToString() {
super();
+ initFormatter();
}
public CastTimestampToString(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
+ initFormatter();
+ }
+
+ private void initFormatter() {
+ try {
+ format = new HiveJavaDateTimeFormatter();
+ format.setFormatter(PRINT_FORMATTER);
+ } catch (WrongFormatterException e) {
+ // this will never happen
+ throw new RuntimeException(e); //todo frogmethod
+ }
}
// The assign method will be overridden for CHAR and VARCHAR.
@@ -57,12 +74,16 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
@Override
protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
- byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC)
- .withNano(inV.nanos[i])
- .format(PRINT_FORMATTER).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, inV, i, format);
}
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i, HiveDateTimeFormatter formatter) {
+ String formattedLocalDateTime = formatter.format(
+ org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i]));
+
+ byte[] temp = formattedLocalDateTime.getBytes();
+ assign(outV, i, temp, temp.length);
+ }
public static String getTimestampString(Timestamp ts) {
return
LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC)
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
new file mode 100644
index 0000000000..0e08ce2054
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastTimestampToStringWithFormat extends CastTimestampToString {
+ private static final long serialVersionUID = 1L;
+ private transient HiveDateTimeFormatter formatter;
+
+ public CastTimestampToStringWithFormat() {
+ super();
+ }
+
+ public CastTimestampToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found
+ }
+
+ formatter = GenericUDF.getSqlDateTimeFormatterOrNull();
+ if (formatter == null) {
+ throw new RuntimeException(); //frogmethod, need a specific exception for this. the format string isn't found
+ }
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8));
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.TIMESTAMP,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index f22511ad67..2a65f0e74e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -248,7 +248,8 @@ castExpression
expression
KW_AS
primitiveType
- RPAREN -> ^(TOK_FUNCTION primitiveType expression)
+ (KW_FORMAT expression)?
+ RPAREN -> ^(TOK_FUNCTION primitiveType expression*)
;
caseExpression
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java
index 3cee0c1d1c..e40cb9f961 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java
@@ -18,12 +18,13 @@
package org.apache.hadoop.hive.ql.udf;
-import java.text.SimpleDateFormat;
-import java.util.Date;
import java.util.TimeZone;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -34,11 +35,17 @@
*/
@Description(name = "from_unixtime",
value = "_FUNC_(unix_time, format) - returns unix_time in the specified format",
- extended = "Example:\n"
+ extended = "format is a String which specifies the format for output. If session-level "
+ + "setting hive.use.sql.datetime.formats is true, "
+ + "output_date_format will be interpreted as SQL:2016 datetime format. Otherwise it will "
+ + "be interpreted as java.text.SimpleDateFormat.\n"
+ + "Example:\n"
+ " > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;\n"
+ " '1970-01-01 00:00:00'")
public class UDFFromUnixTime extends UDF {
- private SimpleDateFormat formatter;
+ private HiveDateTimeFormatter formatter;
+ private boolean useSqlFormat = true;
+ private boolean lastUsedSqlFormats = true;
private Text result = new Text();
private Text lastFormat = new Text();
@@ -58,9 +65,8 @@ public Text evaluate(IntWritable unixtime) {
* @param unixtime
* The number of seconds from 1970-01-01 00:00:00
* @param format
- * See
- * http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat
- * .html
+ * See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html,
+ * or set hive.use.sql.datetime.formats=true to use SQL:2016 formats.
* @return a String in the format specified.
*/
public Text evaluate(LongWritable unixtime, Text format) {
@@ -92,9 +98,8 @@ public Text evaluate(LongWritable unixtime) {
* @param unixtime
* The number of seconds from 1970-01-01 00:00:00
* @param format
- * See
- * http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat
- * .html
+ * See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html,
+ * or set hive.use.sql.datetime.formats=true to use SQL:2016 formats.
* @return a String in the format specified.
*/
public Text evaluate(IntWritable unixtime, Text format) {
@@ -112,21 +117,31 @@ public Text evaluate(IntWritable unixtime, Text format) {
* @param unixtime
* seconds of type long from 1970-01-01 00:00:00
* @param format
- * display format. See
- * http://java.sun.com/j2se/1.4.2/docs/api/java/text
- * /SimpleDateFormat.html
+ * display format.
+ * See http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html,
+ * or set hive.use.sql.datetime.formats=true to use SQL:2016 formats.
* @return elapsed time in the given format.
*/
private Text eval(long unixtime, Text format) {
+ initFormatter();
+
if (!format.equals(lastFormat)) {
- formatter = new SimpleDateFormat(format.toString());
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ formatter.setPattern(format.toString());
lastFormat.set(format);
}
// convert seconds to milliseconds
- Date date = new Date(unixtime * 1000L);
- result.set(formatter.format(date));
+ Timestamp ts = Timestamp.ofEpochMilli(unixtime * 1000L);
+ result.set(formatter.format(ts));
return result;
}
+
+ private void initFormatter() {
+ useSqlFormat = GenericUDF.useSqlFormat();
+ if (formatter == null || useSqlFormat != lastUsedSqlFormats) {
+ formatter = GenericUDF.getHiveDateTimeFormatter(useSqlFormat);
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ lastUsedSqlFormats = useSqlFormat;
+ }
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
index 6597f4b34b..9646230dbf 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
@@ -20,19 +20,25 @@
import java.io.Closeable;
import java.io.IOException;
+import java.util.TimeZone;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
@@ -48,12 +54,14 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
/**
* A Generic User-defined function (GenericUDF) for the use with Hive.
@@ -638,4 +646,42 @@ protected String getArgOrder(int i) {
return i + ORDINAL_SUFFIXES[i % 10];
}
}
+
+
+ public static boolean useSqlFormat() {
+ boolean useSqlFormat = HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT.defaultBoolVal;
+ SessionState ss = SessionState.get();
+ if (ss != null) {
+ useSqlFormat = ss.getConf().getBoolVar(HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
+ return useSqlFormat;
+ }
+
+ /**
+ * For cast...(...with format...) UDFs between strings and datetime types.
+ * @return either a HiveSimpleDateFormatter or a HiveSqlDateTimeFormatter, depending on conf.
+ */
+ public static HiveDateTimeFormatter getHiveDateTimeFormatter(boolean definitelyUseSqlFormat) {
+ HiveDateTimeFormatter formatter;
+ if (useSqlFormat() || definitelyUseSqlFormat) {
+ formatter = new HiveSqlDateTimeFormatter();
+ } else {
+ formatter = new HiveSimpleDateFormatter();
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+ return formatter;
+ }
+
+ /**
+ * For functions that only need a HiveDateTimeFormatter if it is for SQL:2016 formats.
+ * Otherwise return null.
+ * Vectorized UDFs also use this.
+ */
+ public static HiveDateTimeFormatter getSqlDateTimeFormatterOrNull() {
+ HiveDateTimeFormatter formatter = getHiveDateTimeFormatter(false);
+ if (formatter instanceof HiveSqlDateTimeFormatter) {
+ return formatter;
+ }
+ return null;
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java
index 6df0913de6..69a49083a8 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java
@@ -22,13 +22,15 @@
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP;
-import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.TimeZone;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -38,7 +40,6 @@
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
-import org.apache.hive.common.util.DateUtils;
/**
* GenericUDFAddMonths.
@@ -51,10 +52,13 @@
value = "_FUNC_(start_date, num_months, output_date_format) - "
+ "Returns the date that is num_months after start_date.",
extended = "start_date is a string or timestamp indicating a valid date. "
- + "num_months is a number. output_date_format is an optional String which specifies the format for output.\n"
- + "The default output format is 'YYYY-MM-dd'.\n"
+ + "num_months is a number.\noutput_date_format is an optional String which specifies the "
+ + "format for output. If session-level setting hive.use.sql.datetime.formats is true, "
+ + "output_date_format will be interpreted as SQL:2016 datetime format. Otherwise it will "
+ + "be interpreted as java.text.SimpleDateFormat.\n"
+ + "The default output format is 'yyyy-MM-dd'.\n"
+ "Example:\n > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'."
- + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;\n"
+ + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'yyyy-MM-dd HH:mm:ss') LIMIT 1;\n"
+ "'2018-02-28 14:15:16'.\n")
@NDV(maxNdv = 250) // 250 seems to be reasonable upper limit for this
public class GenericUDFAddMonths extends GenericUDF {
@@ -63,10 +67,11 @@
private transient Converter[] dtConverters = new Converter[3];
private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3];
private final Text output = new Text();
- private transient SimpleDateFormat formatter = null;
+ private transient HiveDateTimeFormatter formatter = null;
private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
private transient Integer numMonthsConst;
private transient boolean isNumMonthsConst;
+ private boolean useSql;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -75,23 +80,24 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
checkArgPrimitive(arguments, 0);
checkArgPrimitive(arguments, 1);
+ formatter = getHiveDateTimeFormatter(useSql);
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+
if (arguments.length == 3) {
if (arguments[2] instanceof ConstantObjectInspector) {
checkArgPrimitive(arguments, 2);
checkArgGroups(arguments, 2, tsInputTypes, STRING_GROUP);
String fmtStr = getConstantStringValue(arguments, 2);
if (fmtStr != null) {
- formatter = new SimpleDateFormat(fmtStr);
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ formatter.setPattern(fmtStr);
}
} else {
throw new UDFArgumentTypeException(2, getFuncName() + " only takes constant as "
+ getArgOrder(2) + " argument");
}
}
- if (formatter == null) {
- //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd
- formatter = DateUtils.getDateFormat();
+ if (formatter.getPattern() == null) {
+ formatter.setPattern("yyyy-MM-dd");
}
// the function should support both short date and full timestamp format
@@ -143,7 +149,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
}
}
- String res = formatter.format(calendar.getTime());
+ String res = formatter.format(Timestamp.ofEpochMilli(calendar.getTimeInMillis()));
output.set(res);
return output;
@@ -188,4 +194,15 @@ private boolean isLastDayOfMonth(Calendar cal) {
int dd = cal.get(Calendar.DAY_OF_MONTH);
return dd == maxDd;
}
+
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java
index 6d3e86f921..cf27039f3b 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java
@@ -20,12 +20,14 @@
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP;
-import java.text.SimpleDateFormat;
import java.util.TimeZone;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -45,8 +47,9 @@
*/
@Description(name = "date_format", value = "_FUNC_(date/timestamp/string, fmt) - converts a date/timestamp/string "
+ "to a value of string in the format specified by the date format fmt.",
- extended = "Supported formats are SimpleDateFormat formats - "
- + "https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html. "
+ extended = "Supported formats are (1) SimpleDateFormat formats - "
+ + "https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html, "
+ + "or (2) SQL:2016 datetime formats if hive.use.sql.datetime.formats is set to true. "
+ "Second argument fmt should be constant.\n"
+ "Example: > SELECT _FUNC_('2015-04-08', 'y');\n '2015'")
public class GenericUDFDateFormat extends GenericUDF {
@@ -56,7 +59,8 @@
private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[2];
private final java.util.Date date = new java.util.Date();
private final Text output = new Text();
- private transient SimpleDateFormat formatter;
+ private transient HiveDateTimeFormatter formatter;
+ private boolean useSql;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -79,10 +83,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
String fmtStr = getConstantStringValue(arguments, 1);
if (fmtStr != null) {
try {
- formatter = new SimpleDateFormat(fmtStr);
+ formatter = getHiveDateTimeFormatter(useSql);
+ formatter.setPattern(fmtStr);
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
} catch (IllegalArgumentException e) {
- // ignore
+ //reset formatter if something went wrong
+ formatter = null;
}
}
} else {
@@ -110,8 +116,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
ts = Timestamp.ofEpochMilli(d.toEpochMilli());
}
- date.setTime(ts.toEpochMilli());
- String res = formatter.format(date);
+ String res = formatter.format(ts);
if (res == null) {
return null;
}
@@ -128,4 +133,16 @@ public String getDisplayString(String[] children) {
protected String getFuncName() {
return "date_format";
}
+
+
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
index bcc4114099..6c3c3349bb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import java.text.SimpleDateFormat;
-
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol;
@@ -46,7 +44,6 @@
+ " '2009-07-29'")
@VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class})
public class GenericUDFDateSub extends GenericUDFDateAdd {
- private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
public GenericUDFDateSub() {
this.signModifier = -1;
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
index 70f57b7727..e1565a514e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
@@ -17,8 +17,11 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestampWithFormat;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -31,7 +34,6 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestamp;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter;
@@ -48,13 +50,18 @@
*
*/
@Description(name = "timestamp",
-value = "cast(date as timestamp) - Returns timestamp")
+value = "cast( as timestamp [format ]) - Returns timestamp",
+extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used. hive.use.sql.datetime.formats must be turned on to use formats.")
@VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class,
- CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class})
+ CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class,
+ CastStringToTimestampWithFormat.class})
public class GenericUDFTimestamp extends GenericUDF {
private transient PrimitiveObjectInspector argumentOI;
private transient TimestampConverter tc;
+ private HiveDateTimeFormatter formatter = null;
+ private boolean useSql;
/*
* Integer value was interpreted to timestamp inconsistently in milliseconds comparing
* to float/double in seconds. Since the issue exists for a long time and some users may
@@ -88,6 +95,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
tc.setIntToTimestampInSeconds(intToTimestampInSeconds);
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) {
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1));
+ tc.setDateTimeFormatter(formatter);
+ }
+
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
}
@@ -97,21 +111,36 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (o0 == null) {
return null;
}
-
return tc.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (1 <= children.length && children.length <= 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS TIMESTAMP)");
+ sb.append(" AS TIMESTAMP");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
public boolean isIntToTimestampInSeconds() {
return intToTimestampInSeconds;
}
+
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
index 899abf76b8..b94b29cc3c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
@@ -55,7 +55,7 @@ public GenericUDFToChar() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("CHAR cast requires a value argument");
}
try {
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
index c309ffa5e3..0e1d17498f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
@@ -17,11 +17,16 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDateWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -36,16 +41,21 @@
* GenericUDFToDate
*/
@Description(name = "date",
- value = "CAST( as DATE) - Returns the date represented by the date string.",
- extended = "date_string is a string in the format 'yyyy-MM-dd.'"
+ value = "CAST( as DATE [FORMAT ]) - Returns the date represented by the date string.",
+ extended = "date_string is a string in the format 'yyyy-MM-dd.' "
+ + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used for parsing. hive.use.sql.datetime.formats must be turned on for this feature."
+ "Example:\n "
+ " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n"
+ " '2009-01-01'")
-@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class})
+@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class,
+ CastStringToDateWithFormat.class})
public class GenericUDFToDate extends GenericUDF {
private transient PrimitiveObjectInspector argumentOI;
private transient DateConverter dc;
+ private HiveDateTimeFormatter formatter = null;
+ private boolean useSql;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -73,6 +83,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
"The function CAST as DATE takes only primitive types");
}
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) {
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1));
+ dc.setDateTimeFormatter(formatter);
+ }
+
dc = new DateConverter(argumentOI,
PrimitiveObjectInspectorFactory.writableDateObjectInspector);
return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
@@ -90,12 +107,27 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS DATE)");
+ sb.append(" AS DATE");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
index d5764419d6..2e070a0bab 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
@@ -17,7 +17,11 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -28,8 +32,10 @@
import org.slf4j.LoggerFactory;
@Description(name = "string",
-value = "CAST( as STRING) - Converts the argument to a string value.",
-extended = "Example:\n "
+value = "CAST( as STRING [FORMAT ]) - Converts the argument to a string value.",
+extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used. hive.use.sql.datetime.formats must be turned on to use formats.\n"
+ + "Example:\n "
+ " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n"
+ " '1234'")
public class GenericUDFToString extends GenericUDF {
@@ -37,13 +43,15 @@
private transient PrimitiveObjectInspector argumentOI;
private transient TextConverter converter;
+ private HiveDateTimeFormatter formatter = null;
+ private boolean useSql;
public GenericUDFToString() {
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("STRING cast requires a value argument");
}
try {
@@ -53,27 +61,50 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
"The function STRING takes only primitive types");
}
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) {
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1));
+ converter.setDateTimeFormatter(formatter);
+ }
+
converter = new TextConverter(argumentOI);
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
- Object o0 = arguments[0].get();
- if (o0 == null) {
- return null;
- }
+ Object o0 = arguments[0].get();
+ if (o0 == null) {
+ return null;
+ }
- return converter.convert(o0);
+ return converter.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS STRING)");
+ sb.append(" AS STRING");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
+
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java
index eaced5af5a..e3371da9e2 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampLocalTZ.java
@@ -17,7 +17,11 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -35,18 +39,24 @@
* Convert from string to TIMESTAMP WITH LOCAL TIME ZONE.
*/
@Description(name = "timestamp with local time zone",
- value = "CAST(STRING as TIMESTAMP WITH LOCAL TIME ZONE) - returns the" +
- "timestamp with local time zone represented by string.",
- extended = "The string should be of format 'yyyy-MM-dd HH:mm:ss[.SSS...] ZoneId/ZoneOffset'. " +
- "Examples of ZoneId and ZoneOffset are Asia/Shanghai and GMT+08:00. " +
- "The time and zone parts are optional. If time is absent, '00:00:00.0' will be used. " +
- "If zone is absent, the system time zone will be used.")
+ value = "CAST( as TIMESTAMP WITH LOCAL TIME ZONE [FORMAT ]) - returns the" +
+ "timestamp with local time zone represented by string. Optional parsing according to " +
+ "format string.",
+ extended = "The string should be of format 'yyyy-MM-dd HH:mm:ss[.SSS...] ZoneId/ZoneOffset',"
+ + "Examples of ZoneId and ZoneOffset are Asia/Shanghai and GMT+08:00. "
+ + "The time and zone parts are optional. If time is absent, '00:00:00.0' will be used."
+ + "If zone is absent, the system time zone will be used.\n"
+ + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used. hive.use.sql.datetime.formats must be turned on to use formats.")
+
public class GenericUDFToTimestampLocalTZ extends GenericUDF implements SettableUDF {
private transient PrimitiveObjectInspector argumentOI;
private transient PrimitiveObjectInspectorConverter.TimestampLocalTZConverter converter;
private TimestampLocalTZTypeInfo typeInfo;
+ private HiveDateTimeFormatter formatter = null;
+ private boolean useSql;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -76,6 +86,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
SettableTimestampLocalTZObjectInspector outputOI = (SettableTimestampLocalTZObjectInspector)
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new TimestampLocalTZConverter(argumentOI, outputOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null && (useSql || useSqlFormat())) {
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1));
+ converter.setDateTimeFormatter(formatter);
+ }
+
return outputOI;
}
@@ -85,17 +103,22 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (o0 == null) {
return null;
}
+
return converter.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
sb.append(" AS ");
sb.append(typeInfo.getTypeName());
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
sb.append(")");
return sb.toString();
}
@@ -110,4 +133,14 @@ public void setTypeInfo(TypeInfo typeInfo) throws UDFArgumentException {
this.typeInfo = (TimestampLocalTZTypeInfo) typeInfo;
}
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java
index 3c3796e8a6..ab16ec3dd4 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java
@@ -18,14 +18,16 @@
package org.apache.hadoop.hive.ql.udf.generic;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
import java.util.TimeZone;
import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.common.type.TimestampTZ;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
@@ -51,7 +53,10 @@
*/
@Description(name = "to_unix_timestamp",
value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp",
- extended = "Converts the specified time to number of seconds since 1970-01-01.")
+ extended = "Converts the specified time to number of seconds since 1970-01-01.\n"
+ + "pattern is an optional string which specifies the format for output. If session-level "
+ + "setting hive.use.sql.datetime.formats is true, pattern will be interpreted as SQL:2016 "
+ + "datetime format. Otherwise it will be interpreted as java.text.SimpleDateFormat.")
@VectorizedExpressions({VectorUDFUnixTimeStampDate.class, VectorUDFUnixTimeStampString.class, VectorUDFUnixTimeStampTimestamp.class})
public class GenericUDFToUnixTimeStamp extends GenericUDF {
@@ -62,7 +67,8 @@
private transient Converter patternConverter;
private transient String lasPattern = "yyyy-MM-dd HH:mm:ss";
- private transient final SimpleDateFormat formatter = new SimpleDateFormat(lasPattern);
+ private transient HiveDateTimeFormatter formatter = null;
+ private boolean useSql;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -82,6 +88,8 @@ protected void initializeInput(ObjectInspector[] arguments) throws UDFArgumentEx
}
}
+ formatter = getHiveDateTimeFormatter(useSql);
+ formatter.setPattern(lasPattern);
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
PrimitiveObjectInspector arg1OI = (PrimitiveObjectInspector) arguments[0];
@@ -145,12 +153,12 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
return null;
}
if (!patternVal.equals(lasPattern)) {
- formatter.applyPattern(patternVal);
+ formatter.setPattern(patternVal);
lasPattern = patternVal;
}
}
try {
- retValue.set(formatter.parse(textVal).getTime() / 1000);
+ retValue.set(formatter.parse(textVal).toEpochMilli() / 1000);
return retValue;
} catch (ParseException e) {
return null;
@@ -183,4 +191,15 @@ public String getDisplayString(String[] children) {
sb.append(')');
return sb.toString();
}
+
+ /**
+ * Get whether or not to use Sql formats.
+ * Necessary because MapReduce tasks don't have access to SessionState conf, so need to use
+ * MapredContext conf. This is only called in runtime of MapRedTask.
+ */
+ @Override public void configure(MapredContext context) {
+ super.configure(context);
+ useSql =
+ HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT);
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
index b9a2bc2b9f..b1713c7c95 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
@@ -55,7 +55,7 @@ public GenericUDFToVarchar() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("VARCHAR cast requires a value argument");
}
try {
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java
index d560c62adb..a4cdf4a6e9 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java
@@ -34,7 +34,10 @@
@Description(name = "unix_timestamp",
value = "_FUNC_(date[, pattern]) - Converts the time to a number",
extended = "Converts the specified time to number of seconds "
- + "since 1970-01-01. The _FUNC_(void) overload is deprecated, use current_timestamp.")
+ + "since 1970-01-01. The _FUNC_(void) overload is deprecated, use current_timestamp.\n"
+ + "pattern is an optional string which specifies the format for output. If session-level "
+ + "setting hive.use.sql.datetime.formats is true, pattern will be interpreted as SQL:2016 "
+ + "datetime format. Otherwise it will be interpreted as java.text.SimpleDateFormat.")
public class GenericUDFUnixTimeStamp extends GenericUDFToUnixTimeStamp {
private static final Logger LOG = LoggerFactory.getLogger(GenericUDFUnixTimeStamp.class);
private LongWritable currentTimestamp; // retValue is transient so store this separately.
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
index 663237739e..0b4ed8d905 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.Random;
+import org.apache.hadoop.hive.common.type.Date;
import org.junit.Assert;
import org.apache.hadoop.hive.serde2.RandomTypeUtil;
@@ -115,18 +116,19 @@ public void testRoundToDecimalPlaces() throws HiveException {
Assert.assertEquals(1.2346d, resultV.vector[7], Double.MIN_VALUE);
}
- static int DAYS_LIMIT = 365 * 9999;
+ final static int DAYS_LIMIT = 365 * 9999;
+ final static int SMALLEST_EPOCH_DAY = -365 * 1969; //approximate, so we get some negative values
public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) {
Random r = new Random(12099);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector inV;
TimestampColumnVector outV;
- inV = new LongColumnVector();
- outV = new TimestampColumnVector();
+ inV = new LongColumnVector(intValues.length);
+ outV = new TimestampColumnVector(intValues.length);
for (int i = 0; i < intValues.length; i++) {
- intValues[i] = r.nextInt() % DAYS_LIMIT;
+ intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT;
inV.vector[i] = intValues[i];
}
@@ -137,6 +139,36 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) {
+ // get date in timestamp out, and change timestamp out to string out
+ VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues);
+ BytesColumnVector outV = new BytesColumnVector(intValues.length);
+ batch.cols[1] = outV;
+ return batch;
+ }
+
+ // For testing CastDateToStringWithFormat with
+ // TestVectorTypeCastsWithFormat#testCastDateToStringWithFormat
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ LongColumnVector dateColumnV;
+ BytesColumnVector stringColumnV;
+ dateColumnV = new LongColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ dateColumnV.vector[0] = Date.valueOf("2019-12-31").toEpochDay();
+ dateColumnV.vector[1] = Date.valueOf("1776-07-04").toEpochDay();
+ dateColumnV.vector[2] = Date.valueOf("2012-02-29").toEpochDay();
+ dateColumnV.vector[3] = Date.valueOf("1580-08-08").toEpochDay();
+ dateColumnV.vector[4] = Date.valueOf("0005-01-01").toEpochDay();
+ dateColumnV.vector[5] = Date.valueOf("9999-12-31").toEpochDay();
+
+ batch.cols[0] = dateColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() {
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector lcv;
@@ -277,6 +309,24 @@ public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() {
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchStringInDateTimeOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("2019-12-31 00:00:00.999999999").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776-07-04 17:07:06.177617761").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012-02-29 23:59:59.999999999").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580-08-08 00:00:00").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005-01-01 00:00:00").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999-12-31 23:59:59.999999999").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) {
Random r = new Random(345);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
@@ -297,6 +347,58 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[]
return batch;
}
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut(
+ long[] epochSecondValues, int[] nanoValues) {
+ Random r = new Random(345);
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ batch.size = epochSecondValues.length;
+
+ TimestampColumnVector inV;
+ BytesColumnVector outV;
+ inV = new TimestampColumnVector(batch.size);
+ outV = new BytesColumnVector(batch.size);
+
+ for (int i = 0; i < batch.size; i++) {
+ Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+ epochSecondValues[i] = randTimestamp.toEpochSecond();
+ nanoValues[i] = randTimestamp.getNanos();
+ inV.set(i, randTimestamp.toSqlTimestamp());
+ }
+
+ batch.cols[0] = inV;
+ batch.cols[1] = outV;
+
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ TimestampColumnVector timestampColumnV;
+ BytesColumnVector stringColumnV;
+ timestampColumnV = new TimestampColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ timestampColumnV.set(0, getSqlTimestamp("2019-12-31 19:20:21.999999999"));
+ timestampColumnV.set(1, getSqlTimestamp("1776-07-04 17:07:06.177617761"));
+ timestampColumnV.set(2, getSqlTimestamp("2012-02-29 23:59:59.999999999"));
+ timestampColumnV.set(3, getSqlTimestamp("1580-08-08 00:00:00"));
+ timestampColumnV.set(4, getSqlTimestamp("0005-01-01 00:00:00"));
+ timestampColumnV.set(5, getSqlTimestamp("9999-12-31 23:59:59.999999999"));
+
+ batch.cols[0] = timestampColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
+ private static java.sql.Timestamp getSqlTimestamp(String s) {
+ java.sql.Timestamp ts = java.sql.Timestamp.valueOf(s);
+ // subtract 8 hours because sql timestamps are assumed to be given in US/Pacific time
+ ts.setHours(ts.getHours() - 8);
+ return ts;
+ }
+
static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L;
public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
index 58fd7b030e..e10acfabdc 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
@@ -23,10 +23,13 @@
import static org.junit.Assert.assertTrue;
import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
+import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hive.common.type.Date;
import org.junit.Assert;
import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
@@ -72,6 +75,30 @@ public void testVectorCastDoubleToLong() throws HiveException {
Assert.assertEquals(1, resultV.vector[6]);
}
+ // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final
+ // value because VM in test time zone is PST.
+ private final static long TIME_DIFFERENCE = 28800000L;
+ @Test
+ public void testCastDateToString() throws HiveException {
+ int[] intValues = new int[100];
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDateToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < intValues.length; i++) {
+ expected =
+ new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+
+ Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result);
+ }
+ }
+
@Test
public void testCastDateToTimestamp() throws HiveException {
int[] intValues = new int[500];
@@ -192,6 +219,31 @@ public void testCastTimestampToDouble() throws HiveException {
}
}
+ @Test
+ public void testCastTimestampToString() throws HiveException {
+ int numberToTest = 100;
+ long[] epochSecondValues = new long[numberToTest];
+ int[] nanoValues = new int[numberToTest];
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastTimestampToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < numberToTest; i++) {
+ expected = org.apache.hadoop.hive.common.type.Timestamp
+ .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+ Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] +
+ " nanoseconds: " + nanoValues[i],
+ expected, result);
+ }
+ }
+
public byte[] toBytes(String s) {
byte[] b = null;
try {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
new file mode 100644
index 0000000000..647ec87fad
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
@@ -0,0 +1,143 @@
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.TestGenericUDFUtils;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+public class TestVectorTypeCastsWithFormat {
+
+ @BeforeClass
+ public static void setup() {
+ //set hive.use.sql.datetime.formats to true
+ TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true);
+ }
+
+ @Test
+ public void testCastDateToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastDateToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+ verifyString(0, "2019", resultV);
+ verifyString(1, "1776", resultV);
+ verifyString(2, "2012", resultV);
+ verifyString(3, "1580", resultV);
+ verifyString(4, "0005", resultV);
+ verifyString(5, "9999", resultV);
+
+ expr = new CastDateToStringWithFormat(0, "MM".getBytes(), 1);
+ b.cols[1] = resultV = new BytesColumnVector();
+ expr.evaluate(b);
+ verifyString(0, "12", resultV);
+ verifyString(1, "07", resultV);
+ verifyString(2, "02", resultV);
+ verifyString(3, "07", resultV); //frogmethod change to 08 when simpledatetime is removed
+ verifyString(4, "01", resultV);
+ verifyString(5, "12", resultV);
+ }
+
+ @Test
+ public void testCastTimestampToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastTimestampToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("2019", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("1776", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("2012", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("1580", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("0005", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("9999", getStringFromBytesColumnVector(resultV, 5));
+
+ b.cols[1] = resultV = new BytesColumnVector();
+ expr = new CastTimestampToStringWithFormat(0, "HH".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("19", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("17", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 5));
+
+ //todo frogmethod test nanos (FFFFFFFFF)
+ }
+
+ @Test
+ public void testCastStringToTimestampWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInDateTimeOutFormatted();
+ TimestampColumnVector resultV;
+ b.cols[1] = resultV = new TimestampColumnVector();
+ VectorExpression expr = new CastStringToTimestampWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+
+ verifyTimestamp("2019-01-01 00:00:00", resultV, 0);
+ verifyTimestamp("1776-01-01 00:00:00", resultV, 1);
+ verifyTimestamp("2012-01-01 00:00:00", resultV, 2);
+ verifyTimestamp("1580-01-11 00:00:00", resultV, 3); //frogmethod fails - expected -14579395200000 / actual -12306384000000
+ verifyTimestamp("0004-12-30 00:00:00", resultV, 4); //frogmeth0d also fails
+ verifyTimestamp("9999-01-01 00:00:00", resultV, 5);
+
+ b.cols[1] = resultV = new TimestampColumnVector();
+ expr = new CastStringToTimestampWithFormat(0, "yyyy-MM".getBytes(), 1);
+ expr.evaluate(b);
+
+ verifyTimestamp("2019-12-01 00:00:00", resultV, 0);
+ verifyTimestamp("1776-07-01 00:00:00", resultV, 1);
+ verifyTimestamp("2012-02-01 00:00:00", resultV, 2);
+ verifyTimestamp("1580-08-11 00:00:00", resultV, 3); //frogmethod this is wrong
+ verifyTimestamp("0004-12-30 00:00:00", resultV, 4); //frogmethod this is wrong
+ verifyTimestamp("9999-12-01 00:00:00", resultV, 5);
+
+ //todo frogmethod test nanos (FFFFFFFFF)
+ }
+
+ private void verifyTimestamp(String tsString, TimestampColumnVector resultV, int index) {
+ Assert.assertEquals(Timestamp.valueOf(tsString).toEpochMilli(), resultV.time[index]);
+ Assert.assertEquals(Timestamp.valueOf(tsString).getNanos(), resultV.nanos[index]);
+ }
+
+ @Test
+ public void testCastStringToDateWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInDateTimeOutFormatted();
+ LongColumnVector resultV;
+ b.cols[1] = resultV = new LongColumnVector();
+ VectorExpression expr = new CastStringToDateWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals(Date.valueOf("2019-01-01").toEpochDay(), resultV.vector[0]);
+ Assert.assertEquals(Date.valueOf("1776-01-01").toEpochDay(), resultV.vector[1]);
+ Assert.assertEquals(Date.valueOf("2012-01-01").toEpochDay(), resultV.vector[2]);
+// Assert.assertEquals(Date.valueOf("1580-01-01").toEpochDay(), resultV.vector[3]); //frogmethod fails
+// Assert.assertEquals(Date.valueOf("0005-01-01").toEpochDay(), resultV.vector[4]); //frogmethod also fails
+ Assert.assertEquals(Date.valueOf("9999-01-01").toEpochDay(), resultV.vector[5]);
+ }
+
+ private void verifyString(int resultIndex, String expected, BytesColumnVector resultV) {
+ String result = getStringFromBytesColumnVector(resultV, resultIndex);
+ Assert.assertEquals(expected, result);
+ }
+
+ private String getStringFromBytesColumnVector(BytesColumnVector resultV, int i) {
+ String result;
+ byte[] resultBytes = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(resultBytes, StandardCharsets.UTF_8);
+ return result;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java
new file mode 100644
index 0000000000..6bc5c14ff6
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.TestGenericUDFUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests UDFFromUnixTime.
+ */
+public class TestUDFFromUnixTime {
+
+ @Test
+ public void testFromUnixTime() throws HiveException {
+ UDFFromUnixTime udf = new UDFFromUnixTime();
+
+ //int, no format
+ verifyInt(0, "1970-01-01 00:00:00", null, udf);
+ verifyInt(1296705906, "2011-02-03 04:05:06", null, udf);
+ verifyInt(1514818800, "2018-01-01 15:00:00", null, udf);
+
+ //long, no format
+ verifyLong(0L, "1970-01-01 00:00:00", null, udf);
+ verifyLong(1296705906L, "2011-02-03 04:05:06", null, udf);
+ verifyLong(1514818800L, "2018-01-01 15:00:00", null, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "0995-01-05 15:00:00", null, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "0005-01-01 00:00:00", null, udf);
+ verifyLong(253402300799L, "9999-12-31 23:59:59", null, udf);
+
+ //int with format
+ String format = "HH:mm:ss";
+ verifyInt(0, "00:00:00", format, udf);
+ verifyInt(1296705906, "04:05:06", format, udf);
+ verifyInt(1514818800, "15:00:00", format, udf);
+
+ //long with format
+ verifyLong(0L, "00:00:00", format, udf);
+ verifyLong(1296705906L, "04:05:06", format, udf);
+ verifyLong(1514818800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "00:00:00", format, udf);
+ verifyLong(253402300799L, "23:59:59", format, udf);
+
+ }
+
+ @Test
+ public void testFromUnixTimeWithSqlFormat() throws HiveException {
+ TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true);
+ UDFFromUnixTime udf = new UDFFromUnixTime();
+
+ //int with format
+ String format = "HH:mm:ss"; //todo frogmethod this
+ verifyInt(0, "00:00:00", format, udf);
+ verifyInt(1296705906, "04:05:06", format, udf);
+ verifyInt(1514818800, "15:00:00", format, udf);
+
+ //long with format
+ verifyLong(0L, "00:00:00", format, udf);
+ verifyLong(1296705906L, "04:05:06", format, udf);
+ verifyLong(1514818800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "00:00:00", format, udf);
+ verifyLong(253402300799L, "23:59:59", format, udf);
+ }
+
+ private void verifyInt(int value, String expected, String format, UDFFromUnixTime udf) {
+ IntWritable input = new IntWritable(value);
+ Text res;
+ if (format == null) {
+ res = udf.evaluate(input);
+ } else {
+ res = udf.evaluate(input, new Text(format));
+ }
+ Assert.assertEquals(expected, res.toString());
+ }
+
+ private void verifyLong(long value, String expected, String format, UDFFromUnixTime udf) {
+ LongWritable input = new LongWritable(value);
+ Text res;
+ if (format == null) {
+ res = udf.evaluate(input);
+ } else {
+ res = udf.evaluate(input, new Text(format));
+ }
+ Assert.assertEquals(expected, res.toString());
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
index 7c2ee15646..44a827b0e5 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
@@ -35,9 +35,9 @@
public class TestGenericUDFAddMonths extends TestCase {
- private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss");
- private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS");
- private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd");
+ private final Text fmtTextWithTime = new Text("yyyy-MM-dd HH:mm:ss");
+ private final Text fmtTextWithTimeAndms = new Text("yyyy-MM-dd HH:mm:ss.SSS");
+ private final Text fmtTextWithoutTime = new Text("yyyy-MM-dd");
private final Text fmtTextInvalid = new Text("YYYY-abcdz");
public void testAddMonthsInt() throws HiveException {
@@ -214,7 +214,33 @@ public void testAddMonthsLong() throws HiveException {
}
}
+ public void testSqlDateFormats() throws HiveException {
+ TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true);
+ GenericUDFAddMonths udf = new GenericUDFAddMonths();
+ ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector;
+
+ // format 1
+ Text formatPatternYear = new Text("yyyy");
+ ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo,
+ formatPatternYear);
+ ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2};
+ udf.initialize(arguments);
+
+ runAndVerify("2014-12-31 23:59:59", -12, formatPatternYear,"2013", udf);
+
+ // format 2
+ Text formatPatternHour = new Text("HH"); // frogmethod todo hh24
+ valueOI2 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo,
+ formatPatternHour);
+ arguments[2] = valueOI2;
+ udf.initialize(arguments);
+
+ runAndVerify("2014-12-31 23:59:59", -12, formatPatternYear,"23", udf);
+ }
private void runAndVerify(String str, int months, String expResult, GenericUDF udf)
throws HiveException {
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
new file mode 100644
index 0000000000..11c4a3a6ba
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
@@ -0,0 +1,103 @@
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.common.type.TimestampTZ;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.Text;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertNull;
+
+public class TestGenericUDFCastWithFormat {
+
+ @BeforeClass
+ public static void setup() {
+ TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true);
+ }
+
+ @Test
+ public void testDateToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ testCast(udf, inputOI, new DateWritableV2(Date.valueOf("2009-07-30")), "yyyy-MM-dd", "2009-07-30");
+ testCast(udf, inputOI, new DateWritableV2(Date.valueOf("2009-07-30")), "yyyy", "2009");
+ }
+
+ @Test
+ public void testStringToDateWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToDate();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30", "yyyy-MM-dd", "2009-07-30");
+ testCast(udf, inputOI, "2009-07-30", "yyyy", "2009-01-01");
+ //TODO
+ }
+
+ @Test
+ public void testStringToTimestampWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFTimestamp();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30 00:00:00", "yyyy-MM-dd HH:mm:ss", "2009-07-30 00:00:00");
+ testCast(udf, inputOI, "2009-07-30 00:00:00", "yyyy", "2009-01-01 00:00:00");
+ //TODO
+ }
+
+ @Test
+ public void testStringToTimestampTZWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToTimestampLocalTZ();
+ ((GenericUDFToTimestampLocalTZ) udf).setTypeInfo(new TimestampLocalTZTypeInfo("America/Los_Angeles")); //frogmethod probably needs to be local tz.
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30 07:00:00 America/New_York", "yyyy-MM-dd HH:mm:ss", "2009-07-30 00:00:00.0 America/Los_Angeles"); //frogmethod change to HH=04 eventually
+ //TODO
+ }
+
+ @Test
+ public void testTimestampToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 00:00:00")), "yyyy-MM-dd HH:mm:ss", "2009-07-30 00:00:00");
+ testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 00:00:00")), "yyyy", "2009");
+ //TODO
+ }
+
+ @Test
+ public void testTimestampTZToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampTZObjectInspector;
+ testCast(udf, inputOI, new TimestampLocalTZWritable(new TimestampTZ()), "yyyy-MM-dd HH:mm:ss", "1969-12-31 16:00:00");
+ testCast(udf, inputOI, new TimestampLocalTZWritable(new TimestampTZ()), "yyyy", "1969");
+ //TODO
+ }
+
+ private void testCast(
+ GenericUDF udf, ObjectInspector inputOI, Object input, String format, String output)
+ throws HiveException {
+
+ ConstantObjectInspector formatOI =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format));
+ ObjectInspector[] arguments = {inputOI, formatOI};
+ udf.initialize(arguments);
+
+ GenericUDF.DeferredObject valueObj = new GenericUDF.DeferredJavaObject(input);
+ GenericUDF.DeferredObject formatObj = new GenericUDF.DeferredJavaObject(new Text(format));
+ GenericUDF.DeferredObject[] args = {valueObj, formatObj};
+
+ assertEquals(udf.getFuncName() + " test with input type " + inputOI.getTypeName()
+ + " failed ", output, udf.evaluate(args).toString());
+
+ // Try with null args
+ GenericUDF.DeferredObject[] nullArgs = { new GenericUDF.DeferredJavaObject(null) };
+ assertNull(udf.getFuncName() + " with NULL arguments failed", udf.evaluate(nullArgs));
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
index 6a3cdda48a..a3573eee8b 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
@@ -44,24 +44,32 @@ public void testDateFormatStr() throws HiveException {
udf.initialize(arguments);
// date str
- runAndVerifyStr("2015-04-05", fmtText, "Sunday", udf);
- runAndVerifyStr("2015-04-06", fmtText, "Monday", udf);
- runAndVerifyStr("2015-04-07", fmtText, "Tuesday", udf);
- runAndVerifyStr("2015-04-08", fmtText, "Wednesday", udf);
- runAndVerifyStr("2015-04-09", fmtText, "Thursday", udf);
- runAndVerifyStr("2015-04-10", fmtText, "Friday", udf);
- runAndVerifyStr("2015-04-11", fmtText, "Saturday", udf);
- runAndVerifyStr("2015-04-12", fmtText, "Sunday", udf);
+ runAndVerifyStr("2015-04-05", "Sunday", udf);
+ runAndVerifyStr("2015-04-06", "Monday", udf);
+ runAndVerifyStr("2015-04-07", "Tuesday", udf);
+ runAndVerifyStr("2015-04-08", "Wednesday", udf);
+ runAndVerifyStr("2015-04-09", "Thursday", udf);
+ runAndVerifyStr("2015-04-10", "Friday", udf);
+ runAndVerifyStr("2015-04-11", "Saturday", udf);
+ runAndVerifyStr("2015-04-12", "Sunday", udf);
// ts str
- runAndVerifyStr("2015-04-05 10:30:45", fmtText, "Sunday", udf);
- runAndVerifyStr("2015-04-06 10:30:45", fmtText, "Monday", udf);
- runAndVerifyStr("2015-04-07 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyStr("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyStr("2015-04-09 10:30", fmtText, "Thursday", udf);
- runAndVerifyStr("2015-04-10 10:30:45.123", fmtText, "Friday", udf);
- runAndVerifyStr("2015-04-11T10:30:45", fmtText, "Saturday", udf);
- runAndVerifyStr("2015-04-12 10", fmtText, "Sunday", udf);
+ runAndVerifyStr("2015-04-05 10:30:45", "Sunday", udf);
+ runAndVerifyStr("2015-04-06 10:30:45", "Monday", udf);
+ runAndVerifyStr("2015-04-07 10:30:45", "Tuesday", udf);
+ runAndVerifyStr("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyStr("2015-04-09 10:30", "Thursday", udf);
+ runAndVerifyStr("2015-04-10 10:30:45.123", "Friday", udf);
+ runAndVerifyStr("2015-04-11T10:30:45", "Saturday", udf);
+ runAndVerifyStr("2015-04-12 10", "Sunday", udf);
+
+ //make sure hour is ok
+ fmtText = new Text("hh");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyStr("2015-04-10 10:30:45.123", "10", udf);
}
public void testWrongDateStr() throws HiveException {
@@ -73,10 +81,10 @@ public void testWrongDateStr() throws HiveException {
ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2016-02-30 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyStr("2014-01-32", fmtText, "Saturday", udf);
- runAndVerifyStr("01/14/2014", fmtText, null, udf);
- runAndVerifyStr(null, fmtText, null, udf);
+ runAndVerifyStr("2016-02-30 10:30:45", "Tuesday", udf);
+ runAndVerifyStr("2014-01-32", "Saturday", udf);
+ runAndVerifyStr("01/14/2014", null, udf);
+ runAndVerifyStr(null, null, udf);
}
public void testDateFormatDate() throws HiveException {
@@ -89,14 +97,22 @@ public void testDateFormatDate() throws HiveException {
udf.initialize(arguments);
- runAndVerifyDate("2015-04-05", fmtText, "Sunday", udf);
- runAndVerifyDate("2015-04-06", fmtText, "Monday", udf);
- runAndVerifyDate("2015-04-07", fmtText, "Tuesday", udf);
- runAndVerifyDate("2015-04-08", fmtText, "Wednesday", udf);
- runAndVerifyDate("2015-04-09", fmtText, "Thursday", udf);
- runAndVerifyDate("2015-04-10", fmtText, "Friday", udf);
- runAndVerifyDate("2015-04-11", fmtText, "Saturday", udf);
- runAndVerifyDate("2015-04-12", fmtText, "Sunday", udf);
+ runAndVerifyDate("2015-04-05", "Sunday", udf);
+ runAndVerifyDate("2015-04-06", "Monday", udf);
+ runAndVerifyDate("2015-04-07", "Tuesday", udf);
+ runAndVerifyDate("2015-04-08", "Wednesday", udf);
+ runAndVerifyDate("2015-04-09", "Thursday", udf);
+ runAndVerifyDate("2015-04-10", "Friday", udf);
+ runAndVerifyDate("2015-04-11", "Saturday", udf);
+ runAndVerifyDate("2015-04-12", "Sunday", udf);
+
+ // make sure year is ok
+ fmtText = new Text("yyyy");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyDate("2015-04-08", "2015", udf);
}
public void testDateFormatTs() throws HiveException {
@@ -109,15 +125,24 @@ public void testDateFormatTs() throws HiveException {
udf.initialize(arguments);
- runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyTs("2015-04-05 10:30:45", fmtText, "Sunday", udf);
- runAndVerifyTs("2015-04-06 10:30:45", fmtText, "Monday", udf);
- runAndVerifyTs("2015-04-07 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyTs("2015-04-09 10:30:45", fmtText, "Thursday", udf);
- runAndVerifyTs("2015-04-10 10:30:45.123", fmtText, "Friday", udf);
- runAndVerifyTs("2015-04-11 10:30:45.123456789", fmtText, "Saturday", udf);
- runAndVerifyTs("2015-04-12 10:30:45", fmtText, "Sunday", udf);
+ runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyTs("2015-04-05 10:30:45", "Sunday", udf);
+ runAndVerifyTs("2015-04-06 10:30:45", "Monday", udf);
+ runAndVerifyTs("2015-04-07 10:30:45", "Tuesday", udf);
+ runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyTs("2015-04-09 10:30:45", "Thursday", udf);
+ runAndVerifyTs("2015-04-10 10:30:45.123", "Friday", udf);
+ runAndVerifyTs("2015-04-11 10:30:45.123456789", "Saturday", udf);
+ runAndVerifyTs("2015-04-12 10:30:45", "Sunday", udf);
+
+ // make sure hour of day is ok
+ fmtText = new Text("HH");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyTs("2015-04-08 00:30:45", "00", udf);
+
}
public void testNullFmt() throws HiveException {
@@ -130,7 +155,7 @@ public void testNullFmt() throws HiveException {
udf.initialize(arguments);
- runAndVerifyStr("2015-04-05", fmtText, null, udf);
+ runAndVerifyStr("2015-04-05", null, udf);
}
public void testWrongFmt() throws HiveException {
@@ -143,34 +168,55 @@ public void testWrongFmt() throws HiveException {
udf.initialize(arguments);
- runAndVerifyStr("2015-04-05", fmtText, null, udf);
+ runAndVerifyStr("2015-04-05", null, udf);
+ }
+
+ public void testSqlDateFormats() throws HiveException {
+ TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true);
+ GenericUDFDateFormat udf = new GenericUDFDateFormat();
+ ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+
+ // format 1
+ Text fmtText = new Text("yyyy");
+ ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ ObjectInspector[] arguments = { valueOI0, valueOI1 };
+ udf.initialize(arguments);
+
+ runAndVerifyStr("2015-04-05", "2015", udf);
+
+ // format 2
+ fmtText = new Text("MM"); //TODO mm
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+
+ runAndVerifyStr("2015-04-05", "04", udf);
}
- private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyStr(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
- private void runAndVerifyDate(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyDate(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new DateWritableV2(
Date.valueOf(str)) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
- private void runAndVerifyTs(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyTs(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritableV2(
Timestamp.valueOf(str)) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java
index 61623d54c9..cd10311e07 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java
@@ -74,6 +74,14 @@ public void testTimestamp() throws HiveException {
// test null values
runAndVerify(udf, null, null);
+
+ ts = Timestamp.valueOf("1111-02-03 01:02:03");
+ runAndVerify(udf,
+ new TimestampWritableV2(ts),
+ new LongWritable(ts.toEpochSecond()));
+
+ // test null values
+ runAndVerify(udf, null, null);
}
public void testDate() throws HiveException {
@@ -122,4 +130,29 @@ public void testString() throws HiveException {
runAndVerify(udf2, null, new Text(format), null);
runAndVerify(udf2, new Text(val), null, null);
}
+
+ // format argument (2nd arg) is only used when 1st argument is string
+ public void testStringWithSqlFormat() throws HiveException {
+ TestGenericUDFUtils.setHiveUseSqlDateTimeFormats(true);
+
+ ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ String val;
+
+ // Try 2-arg version
+ GenericUDFToUnixTimeStamp udf2 = new GenericUDFToUnixTimeStamp();
+ ObjectInspector[] args2 = {valueOI, valueOI};
+ udf2.initialize(args2);
+
+ val = "2001-02-02";
+ String format = "yyyy";
+ runAndVerify(udf2,
+ new Text(val),
+ new Text(format),
+ new LongWritable(Date.valueOf("2001-01-01").toEpochSecond()));
+
+ // test null values
+ runAndVerify(udf2, null, null, null);
+ runAndVerify(udf2, null, new Text(format), null);
+ runAndVerify(udf2, new Text(val), null, null);
+ }
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java
index b74ff2526c..e62dadfcdf 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFUtils.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.junit.Assert;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
@@ -26,6 +28,14 @@
public class TestGenericUDFUtils {
+ public static void setHiveUseSqlDateTimeFormats(boolean val) {
+ SessionState ss = SessionState.get();
+ if (ss == null) {
+ ss = SessionState.start(new HiveConf());
+ }
+ ss.getConf().setBoolVar(HiveConf.ConfVars.HIVE_USE_SQL_DATETIME_FORMAT, val);
+ }
+
@Test
public void testFindText() throws Exception {
diff --git ql/src/test/queries/clientpositive/sql_formats.q ql/src/test/queries/clientpositive/sql_formats.q
new file mode 100644
index 0000000000..9c2d4560ba
--- /dev/null
+++ ql/src/test/queries/clientpositive/sql_formats.q
@@ -0,0 +1,66 @@
+drop table if exists timestamps;
+drop table if exists timestampLocalTzs;
+drop table if exists dates;
+drop table if exists strings;
+
+set hive.use.sql.datetime.formats=true;
+
+--non-vectorized
+set hive.vectorized.execution.enabled=false; --frogmethod see below
+set hive.fetch.task.conversion=none; --frogmethod do you want mapreduce or not
+
+create table timestamps (t timestamp) stored as parquet;
+insert into timestamps values
+("2019-01-01"),
+("1969-12-31 23:59:59.999999999")
+;
+from timestamps select cast (t as string format "yyyy");
+
+
+create table dates (d date) stored as parquet;
+insert into dates values
+("2019-01-01"),
+("1969-12-31")
+;
+from timestamps select cast (t as string format "yyyy");
+
+
+--todo frogmethod uncomment after implementation
+--create table timestampLocalTzs (t timestamp with local time zone);
+--insert into timestamps values
+--("2019-01-01 America/New_York"),
+--("1969-12-31 23:59:59.999999999 Europe/Rome")
+--;
+--from timestampLocalTzs select cast (t as string format "yyyy");
+--from timestampLocalTzs select cast (t as string format "hh"); -- todo change to hh24 maybe
+
+
+create table strings (s string) stored as parquet;
+insert into strings values
+("2019"),
+("1969")
+;
+from strings select cast (s as timestamp format "yyyy");
+from strings select cast (s as date format "yyyy");
+--from strings select cast (s as timestamp with local time zone format "yyyy"); //frogmethod
+
+
+--correct descriptions
+explain
+from strings select cast (s as timestamp format "yyyy");
+explain
+from strings select cast (s as date format "yyyy");
+--explain
+--from strings select cast (s as timestamp with local time zone format "yyyy"); //frogmethod
+explain
+from timestamps select cast (t as string format "yyyy");
+
+
+--vectorized
+set hive.fetch.task.conversion=none;
+set hive.vectorized.execution.enabled=true;
+
+--from timestamps select cast (t as string format "yyyy"); todo frogmethod uncomment after fixing
+--from dates select cast (d as string format "yyyy");
+from strings select cast (s as timestamp format "yyyy");
+from strings select cast (s as date format "yyyy");
diff --git ql/src/test/results/clientpositive/sql_formats.q.out ql/src/test/results/clientpositive/sql_formats.q.out
new file mode 100644
index 0000000000..78dd3f2e76
--- /dev/null
+++ ql/src/test/results/clientpositive/sql_formats.q.out
@@ -0,0 +1,260 @@
+PREHOOK: query: drop table if exists timestamps
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists timestamps
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists timestampLocalTzs
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists timestampLocalTzs
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists dates
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists dates
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists strings
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists strings
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: --frogmethod do you want mapreduce or not
+
+create table timestamps (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: --frogmethod do you want mapreduce or not
+
+create table timestamps (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps
+PREHOOK: query: insert into timestamps values
+("2019-01-01"),
+("1969-12-31 23:59:59.999999999")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: insert into timestamps values
+("2019-01-01"),
+("1969-12-31 23:59:59.999999999")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.t SCRIPT []
+PREHOOK: query: from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2019
+1969
+PREHOOK: query: create table dates (d date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates
+POSTHOOK: query: create table dates (d date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates
+PREHOOK: query: insert into dates values
+("2019-01-01"),
+("1969-12-31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates
+POSTHOOK: query: insert into dates values
+("2019-01-01"),
+("1969-12-31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates
+POSTHOOK: Lineage: dates.d SCRIPT []
+PREHOOK: query: from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2019
+1969
+PREHOOK: query: create table strings (s string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@strings
+POSTHOOK: query: create table strings (s string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@strings
+PREHOOK: query: insert into strings values
+("2019"),
+("1969")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@strings
+POSTHOOK: query: insert into strings values
+("2019"),
+("1969")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@strings
+POSTHOOK: Lineage: strings.s SCRIPT []
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01 00:00:00
+1969-01-01 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01
+1969-01-01
+PREHOOK: query: explain
+from strings select cast (s as timestamp format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+from strings select cast (s as timestamp format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS TIMESTAMP FORMAT 'yyyy') (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+from strings select cast (s as date format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+from strings select cast (s as date format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS DATE FORMAT 'yyyy') (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS STRING FORMAT 'yyyy') (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01 00:00:00
+1969-01-01 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01
+1969-01-01
diff --git ql/src/test/results/clientpositive/udf_add_months.q.out ql/src/test/results/clientpositive/udf_add_months.q.out
index 7150f59d68..42864b3e22 100644
--- ql/src/test/results/clientpositive/udf_add_months.q.out
+++ ql/src/test/results/clientpositive/udf_add_months.q.out
@@ -9,11 +9,11 @@ POSTHOOK: query: DESCRIBE FUNCTION EXTENDED add_months
POSTHOOK: type: DESCFUNCTION
add_months(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date.
start_date is a string or timestamp indicating a valid date. num_months is a number. output_date_format is an optional String which specifies the format for output.
-The default output format is 'YYYY-MM-dd'.
+The default output format is 'yyyy-MM-dd'.
Example:
> SELECT add_months('2009-08-31', 1) FROM src LIMIT 1;
'2009-09-30'.
- > SELECT add_months('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;
+ > SELECT add_months('2017-12-31 14:15:16', 2, 'yyyy-MM-dd HH:mm:ss') LIMIT 1;
'2018-02-28 14:15:16'.
Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFAddMonths
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
index 4b6a3d6c10..0c8405e50b 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
@@ -21,7 +21,9 @@
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;
@@ -147,6 +149,10 @@ public String toString() {
return date.toString();
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ return date.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
return date.toEpochDay();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java
index 3ffcb7a425..9927055a4c 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java
@@ -18,7 +18,10 @@
package org.apache.hadoop.hive.serde2.io;
import com.google.common.base.Preconditions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.common.type.TimestampTZ;
+import org.apache.hadoop.hive.common.type.TimestampTZUtil;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.io.WritableComparable;
@@ -29,6 +32,7 @@
import java.io.IOException;
import java.time.ZoneId;
import java.util.Arrays;
+import java.util.TimeZone;
/**
* Writable for TimestampTZ. Copied from TimestampWritableV2.
@@ -247,6 +251,19 @@ public String toString() {
return timestampTZ.toString();
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+
+ populateTimestampTZ();
+ Timestamp ts = Timestamp.ofEpochSecond(
+ timestampTZ.getZonedDateTime().toEpochSecond(),
+ timestampTZ.getNanos());
+ formatter.setTimeZone(TimeZone.getTimeZone(timestampTZ.getZonedDateTime().getZone()));
+ return formatter.format(ts);
+ }
+
@Override
public void write(DataOutput dataOutput) throws IOException {
checkBytes();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
index 9aa7f19ab2..5972bd92b5 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.time.format.DateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.common.type.TimestampUtils;
@@ -387,6 +388,16 @@ public String toString() {
return timestamp.format(DATE_TIME_FORMAT);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ if (timestampEmpty) {
+ populateTimestamp();
+ }
+ return timestamp.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
long seconds = getSeconds();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
index 84c027d51c..abdd87014c 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
@@ -20,6 +20,7 @@
import java.time.ZoneId;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -250,6 +251,7 @@ public Object convert(Object input) {
PrimitiveObjectInspector inputOI;
SettableDateObjectInspector outputOI;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public DateConverter(PrimitiveObjectInspector inputOI,
SettableDateObjectInspector outputOI) {
@@ -263,7 +265,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input,
- inputOI));
+ inputOI, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -272,6 +278,7 @@ public Object convert(Object input) {
SettableTimestampObjectInspector outputOI;
boolean intToTimestampInSeconds = false;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public TimestampConverter(PrimitiveObjectInspector inputOI,
SettableTimestampObjectInspector outputOI) {
@@ -289,7 +296,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestamp(input,
- inputOI, intToTimestampInSeconds));
+ inputOI, intToTimestampInSeconds, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -298,6 +309,7 @@ public Object convert(Object input) {
final SettableTimestampLocalTZObjectInspector outputOI;
final Object r;
final ZoneId timeZone;
+ private HiveDateTimeFormatter formatter = null;
public TimestampLocalTZConverter(
PrimitiveObjectInspector inputOI,
@@ -314,7 +326,12 @@ public Object convert(Object input) {
return null;
}
- return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestampLocalTZ(input, inputOI, timeZone));
+ return outputOI.set(r,
+ PrimitiveObjectInspectorUtils.getTimestampLocalTZ(input, inputOI, timeZone, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -416,6 +433,7 @@ public Object convert(Object input) {
private static byte[] trueBytes = {'T', 'R', 'U', 'E'};
private static byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'};
+ private HiveDateTimeFormatter formatter = null;
public TextConverter(PrimitiveObjectInspector inputOI) {
// The output ObjectInspector is writableStringObjectInspector.
@@ -486,14 +504,16 @@ public Text convert(Object input) {
}
return t;
case DATE:
- t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
+ t.set(((DateObjectInspector) inputOI)
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMP:
t.set(((TimestampObjectInspector) inputOI)
- .getPrimitiveWritableObject(input).toString());
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMPLOCALTZ:
- t.set(((TimestampLocalTZObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
+ t.set(((TimestampLocalTZObjectInspector) inputOI)
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case INTERVAL_YEAR_MONTH:
t.set(((HiveIntervalYearMonthObjectInspector) inputOI)
@@ -520,6 +540,10 @@ public Text convert(Object input) {
throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName());
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
/**
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 3886b202c7..72d45d93f0 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -29,6 +29,9 @@
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.ParseException;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -1113,6 +1116,11 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi)
}
public static Date getDate(Object o, PrimitiveObjectInspector oi) {
+ return getDate(o, oi, null);
+ }
+
+ public static Date getDate(
+ Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
@@ -1125,13 +1133,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
StringObjectInspector soi = (StringObjectInspector) oi;
String s = soi.getPrimitiveJavaObject(o).trim();
try {
- if (s.length() == DATE_LENGTH) {
- result = Date.valueOf(s);
- } else {
- Timestamp ts = getTimestampFromString(s);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(s, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1141,13 +1145,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
case VARCHAR: {
String val = getString(o, oi).trim();
try {
- if (val.length() == DATE_LENGTH) {
- result = Date.valueOf(val);
- } else {
- Timestamp ts = getTimestampFromString(val);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(val, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1177,11 +1177,46 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
return result;
}
+ private final static int DATE_LENGTH = "YYYY-MM-DD".length();
+ private static Date getDateFromString(String s, HiveDateTimeFormatter formatter) {
+
+ // with SQL formats
+ if (formatter != null) {
+ try {
+ return Date.valueOf(s, formatter);
+ } catch (ParseException e) {
+ return null;
+ }
+ }
+
+ // without SQL formats
+ if (s.length() == DATE_LENGTH) {
+ return Date.valueOf(s);
+ } else {
+ Timestamp ts = getTimestampFromString(s);
+ if (ts != null) {
+ return Date.ofEpochMilli(ts.toEpochMilli());
+ }
+ }
+ return null;
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) {
return getTimestamp(o, oi, false);
}
+ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
+ return getTimestamp(o, oi, false, formatter);
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, boolean intToTimestampInSeconds) {
+ return getTimestamp(o, inputOI, intToTimestampInSeconds, null);
+ }
+
+ public static Timestamp getTimestamp(Object o,
+ PrimitiveObjectInspector inputOI,
+ boolean intToTimestampInSeconds,
+ HiveDateTimeFormatter format) {
if (o == null) {
return null;
}
@@ -1225,11 +1260,11 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
case STRING:
StringObjectInspector soi = (StringObjectInspector) inputOI;
String s = soi.getPrimitiveJavaObject(o);
- result = getTimestampFromString(s);
+ result = getTimestampFromString(s, format);
break;
case CHAR:
case VARCHAR:
- result = getTimestampFromString(getString(o, inputOI));
+ result = getTimestampFromString(getString(o, inputOI), format);
break;
case DATE:
result = Timestamp.ofEpochMilli(
@@ -1254,15 +1289,17 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
return result;
}
- private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length();
- private final static int DATE_LENGTH = "YYYY-MM-DD".length();
-
public static Timestamp getTimestampFromString(String s) {
+ return getTimestampFromString(s, null);
+ }
+
+ public static Timestamp getTimestampFromString(String s, HiveDateTimeFormatter formatter) {
+
s = s.trim();
s = trimNanoTimestamp(s);
try {
- return TimestampUtils.stringToTimestamp(s);
+ return TimestampUtils.stringToTimestamp(s, formatter);
} catch (IllegalArgumentException e) {
return null;
}
@@ -1284,21 +1321,13 @@ private static String trimNanoTimestamp(String s) {
return s;
}
- private static boolean isValidTimeStamp(final String s) {
- if (s.length() == TS_LENGTH ||
- (s.contains(".") &&
- s.substring(0, s.indexOf('.')).length() == TS_LENGTH)) {
- // Possible timestamp
- if (s.charAt(DATE_LENGTH) == '-') {
- return false;
- }
- return true;
- }
- return false;
+ public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi,
+ ZoneId timeZone) {
+ return getTimestampLocalTZ(o, oi, timeZone, null);
}
public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi,
- ZoneId timeZone) {
+ ZoneId timeZone, HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
@@ -1309,7 +1338,7 @@ public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector
case STRING: {
StringObjectInspector soi = (StringObjectInspector) oi;
String s = soi.getPrimitiveJavaObject(o).trim();
- return TimestampTZUtil.parseOrNull(trimNanoTimestamp(s), timeZone);
+ return TimestampTZUtil.parseOrNull(trimNanoTimestamp(s), timeZone, formatter);
}
case CHAR:
case VARCHAR: {