diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
new file mode 100644
index 0000000000..a158d4befd
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.util.TimeZone;
+
+/**
+ * Interface used for formatting and parsing timestamps. Initially created so that user is able to
+ * optionally format datetime objects into strings and parse strings into datetime objects with
+ * SQL:2016 semantics, as well as with the legacy (java.text.SimpleDateFormat) format.
+ */
+public interface HiveDateTimeFormatter {
+ /**
+ * Format the given timestamp into a string.
+ *
+ * @throws IllegalArgumentException if timestamp cannot be formatted.
+ */
+ String format(Timestamp ts);
+
+ /**
+ * Format the given date into a string.
+ *
+ * @throws IllegalArgumentException if date cannot be formatted.
+ */
+ String format(Date date);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws IllegalArgumentException if string cannot be parsed.
+ */
+ Timestamp parseTimestamp(String string);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws IllegalArgumentException if string cannot be parsed.
+ */
+ Date parseDate(String string);
+
+ /**
+ * Get the format pattern to be used for formatting datetime objects or parsing strings.
+ */
+ String getPattern();
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java
new file mode 100644
index 0000000000..409a902e65
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+
+/**
+ * Wrapper for DateTimeFormatter in the java.time package.
+ */
+public class HiveJavaDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private DateTimeFormatter formatter;
+
+ public HiveJavaDateTimeFormatter(DateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
+
+ @Override public String format(Timestamp ts) {
+ return formatter.format(
+ LocalDateTime.ofInstant(
+ Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneId.of("UTC")));
+ }
+
+ @Override public String format(Date date) {
+ return format(Timestamp.ofEpochMilli(date.toEpochMilli()));
+ }
+
+ @Override public Timestamp parseTimestamp(String string) {
+ LocalDateTime ldt = LocalDateTime.parse(string, formatter);
+ return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano());
+ }
+
+ @Override public Date parseDate(String string) {
+ return Date.ofEpochMilli(parseTimestamp(string).toEpochMilli());
+ }
+
+ @Override public String getPattern() {
+ return formatter.toString();
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java
new file mode 100644
index 0000000000..faec4891d1
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.TimeZone;
+
+/**
+ * Wrapper for java.text.SimpleDateFormat.
+ */
+public class HiveSimpleDateFormatter implements HiveDateTimeFormatter {
+
+ private SimpleDateFormat format = new SimpleDateFormat();
+ private String pattern;
+
+ public HiveSimpleDateFormatter(String pattern, TimeZone timeZone) {
+ setPattern(pattern);
+ format.setTimeZone(timeZone);
+ }
+
+ @Override public String format(Timestamp ts) {
+ Date date = new Date(ts.toEpochMilli());
+ return format.format(date);
+ }
+
+ @Override public String format(org.apache.hadoop.hive.common.type.Date d) {
+ Date date = new Date(d.toEpochMilli());
+ return format.format(date);
+ }
+
+ @Override public Timestamp parseTimestamp(String string) {
+ try {
+ Date date = format.parse(string);
+ return Timestamp.ofEpochMilli(date.getTime());
+ } catch (java.text.ParseException e) {
+ throw new IllegalArgumentException(
+ "String " + string + " could not be parsed by java.text.SimpleDateFormat: " + format);
+ }
+ }
+
+ @Override public org.apache.hadoop.hive.common.type.Date parseDate(String string) {
+ return org.apache.hadoop.hive.common.type.Date.ofEpochMilli(
+ parseTimestamp(string).toEpochMilli());
+ }
+
+ private void setPattern(String pattern) {
+ format.applyPattern(pattern);
+ this.pattern = pattern;
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..601e38bfd5
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -0,0 +1,876 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.DateTimeException;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.temporal.ChronoField;
+import java.time.temporal.ChronoUnit;
+import java.time.temporal.TemporalField;
+import java.time.temporal.TemporalUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * Formatter using SQL:2016 datetime patterns.
+ *
+ * For all tokens:
+ * - Patterns are case-insensitive, except AM/PM and T/Z. See these sections for more details.
+ * - For string to datetime conversion, no duplicate format tokens are allowed, including tokens
+ * that have the same meaning but different lengths ("Y" and "YY" conflict) or different
+ * behaviors ("RR" and "YY" conflict).
+ *
+ * For all numeric tokens:
+ * - The "expected length" of input/output is the number of tokens in the character (e.g. "YYY": 3,
+ * "Y": 1, and so on), with some exceptions (see map SPECIAL_LENGTHS).
+ * - For string to datetime conversion, inputs of fewer digits than expected are accepted if
+ * followed by a delimiter, e.g. format="YYYY-MM-DD", input="19-1-1", output=2019-01-01 00:00:00.
+ * - For datetime to string conversion, output is left padded with zeros, e.g. format="DD SSSSS",
+ * input=2019-01-01 00:00:03, output="01 00003".
+ *
+ *
+ * Accepted format tokens:
+ * Note: "|" means "or". "Delimiter" means a separator, tokens T or Z, or end of input.
+ *
+ * A. Temporal tokens
+ * YYYY
+ * 4-digit year
+ * - For string to datetime conversion, prefix digits for 1, 2, and 3-digit inputs are obtained
+ * from current date
+ * E.g. input=‘9-01-01’, pattern =‘YYYY-MM-DD’, current year=2020, output=2029-01-01 00:00:00
+ *
+ *
+ * YYY
+ * Last 3 digits of a year
+ * - Gets the prefix digit from current date.
+ * - Can accept fewer digits than 3, similarly to YYYY.
+ *
+ * YY
+ * Last 2 digits of a year
+ * - Gets the 2 prefix digits from current date.
+ * - Can accept fewer digits than 2, similarly to YYYY.
+ *
+ * Y
+ * Last digit of a year
+ * - Gets the 3 prefix digits from current date.
+ *
+ * RRRR
+ * 4-digit rounded year
+ * - String to datetime conversion:
+ * - If 2 digits are provided then acts like RR.
+ * - If 1,3 or 4 digits provided then acts like YYYY.
+ * - For datetime to string conversion, acts like YYYY.
+ *
+ * RR
+ * 2-digit rounded year
+ * -String to datetime conversion:
+ * - Semantics:
+ * Input: Last 2 digits of current year: First 2 digits of output:
+ * 0 to 49 00 to 49 First 2 digits of current year
+ * 0 to 49 50 to 99 First 2 digits of current year + 1
+ * 50 to 99 00 to 49 First 2 digits of current year - 1
+ * 50 to 99 50 to 99 First 2 digits of current year
+ * - If 1-digit year is provided followed by a delimiter, falls back to YYYY with 1-digit year
+ * input.
+ * - For datetime to string conversion, acts like YY.
+ *
+ * MM
+ * Month (1-12)
+ * - For string to datetime conversion, conflicts with DDD.
+ *
+ * DD
+ * Day of month (1-31)
+ * - For string to datetime conversion, conflicts with DDD.
+ *
+ * DDD
+ * Day of year (1-366)
+ * - For string to datetime conversion, conflicts with DD and MM.
+ *
+ * HH
+ * Hour of day (1-12)
+ * - If no AM/PM provided then defaults to AM.
+ * - In string to datetime conversion, conflicts with SSSSS and HH24.
+ *
+ * HH12
+ * Hour of day (1-12)
+ * See HH.
+ *
+ * HH24
+ * Hour of day (0-23)
+ * - In string to datetime conversion, conflicts with SSSSS, HH12 and AM/PM.
+ *
+ * MI
+ * Minute of hour (0-59)
+ * - In string to datetime conversion, conflicts with SSSSS.
+ *
+ * SS
+ * Second of minute (0-59)
+ * - In string to datetime conversion, conflicts with SSSSS.
+ *
+ * SSSSS
+ * Second of Day (0-86399)
+ * - In string to datetime conversion, conflicts with SS, HH, HH12, HH24, MI, AM/PM.
+ *
+ * FF[1..9]
+ * Fraction of second
+ * - 1..9 indicates the number of decimal digits. "FF" (no number of digits specified) is also
+ * accepted.
+ * - In datetime to string conversion, "FF" will omit trailing zeros, or output "0" if subsecond
+ * value is 0.
+ * - In string to datetime conversion, fewer digits than expected are accepted if followed by a
+ * delimiter. "FF" acts like "FF9".
+ *
+ * AM|A.M.
+ * Meridiem indicator or AM/PM
+ * - Datetime to string conversion:
+ * - AM and PM mean the exact same thing in the pattern.
+ * e.g. input=2019-01-01 20:00, format=“AM”, output=“PM”.
+ * - Retains the exact format (capitalization and length) provided in the pattern string. If p.m.
+ * is in the pattern, we expect a.m. or p.m. in the output; if AM is in the pattern, we expect
+ * AM or PM in the output.
+ * - String to datetime conversion:
+ * - Conflicts with HH24 and SSSSS.
+ * - It doesn’t matter which meridian indicator is in the pattern.
+ * E.g. input="2019-01-01 11:00 p.m.", pattern="YYYY-MM-DD HH12:MI AM",
+ * output=2019-01-01 23:00:00
+ *
+ * PM|P.M.
+ * Meridiem indicator
+ * See AM|A.M.
+ *
+ * B. Time zone tokens
+ * TZH
+ * Time zone offset hour (-15 to +15)
+ * - 3-character-long input is expected: 1 character for the sign and 2 digits for the value.
+ * e.g. “+10”, “-05”
+ * - 2-digit input is accepted without the sign, e.g. “04”.
+ * - Both these 2 and 3-digit versions are accepted even if not followed by separators.
+ * - Disabled for timestamp to string and date to string conversion, as timestamp and date are time
+ * zone agnostic.
+ *
+ * TZM
+ * Time zone offset minute (0-59)
+ * - For string to datetime conversion:
+ * - TZH token is required.
+ * - Unsigned; sign comes from TZH.
+ * - Therefore time zone offsets like “-30” minutes should be expressed thus: input=“-00:30”
+ * pattern=“TZH:TZM”.
+ * - Disabled for timestamp to string and date to string conversion, as timestamp and date are time
+ * zone agnostic.
+ *
+ * C. Separators
+ * -|.|/|,|'|;|:|
+ * Separator
+ * - Uses loose matching. Existence of a sequence of separators in the format should match the
+ * existence of a sequence of separators in the input regardless of the types of the separator or
+ * the length of the sequence where length > 1. E.g. input=“2019-. ;10/10”, pattern=“YYYY-MM-DD”
+ * is valid; input=“20191010”, pattern=“YYYY-MM-DD” is not valid.
+ * - If the last separator character in the separator substring is "-" and is immediately followed
+ * by a time zone hour (tzh) token, it's a negative sign and not counted as a separator, UNLESS
+ * this is the only possible separator character in the separator substring (in which case it is
+ * not counted as the tzh's negative sign).
+ *
+ * D. ISO 8601 delimiters
+ * T
+ * ISO 8601 delimiter
+ * - Serves as a delimiter.
+ * - Function is to support formats like “YYYY-MM-DDTHH24:MI:SS.FF9Z”, “YYYY-MM-DD-HH24:MI:SSZ”
+ * - For datetime to string conversion, output is always capitalized ("T"), even if lowercase ("t")
+ * is provided in the pattern.
+ *
+ * Z
+ * ISO 8601 delimiter
+ * See T.
+ */
+
+public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private static final int LONGEST_TOKEN_LENGTH = 5;
+ private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake
+ private static final long MINUTES_PER_HOUR = 60;
+ private static final int FIFTY = 50;
+ private static final int NANOS_MAX_LENGTH = 9;
+ public static final int AM = 0;
+ public static final int PM = 1;
+ private String pattern;
+ private List tokens = new ArrayList<>();
+
+ private static final Map TEMPORAL_TOKENS =
+ ImmutableMap.builder()
+ .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR)
+ .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR)
+ .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR)
+ .put("mm", ChronoField.MONTH_OF_YEAR)
+ .put("dd", ChronoField.DAY_OF_MONTH)
+ .put("ddd", ChronoField.DAY_OF_YEAR)
+ .put("hh", ChronoField.HOUR_OF_AMPM)
+ .put("hh12", ChronoField.HOUR_OF_AMPM)
+ .put("hh24", ChronoField.HOUR_OF_DAY)
+ .put("mi", ChronoField.MINUTE_OF_HOUR)
+ .put("ss", ChronoField.SECOND_OF_MINUTE)
+ .put("sssss", ChronoField.SECOND_OF_DAY)
+ .put("ff1", ChronoField.NANO_OF_SECOND).put("ff2", ChronoField.NANO_OF_SECOND)
+ .put("ff3", ChronoField.NANO_OF_SECOND).put("ff4", ChronoField.NANO_OF_SECOND)
+ .put("ff5", ChronoField.NANO_OF_SECOND).put("ff6", ChronoField.NANO_OF_SECOND)
+ .put("ff7", ChronoField.NANO_OF_SECOND).put("ff8", ChronoField.NANO_OF_SECOND)
+ .put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND)
+ .put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY)
+ .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY)
+ .build();
+
+ private static final Map TIME_ZONE_TOKENS =
+ ImmutableMap.builder()
+ .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build();
+
+ private static final List VALID_ISO_8601_DELIMITERS =
+ ImmutableList.of("t", "z");
+
+ private static final List VALID_SEPARATORS =
+ ImmutableList.of("-", ":", " ", ".", "/", ";", "\'", ",");
+
+ private static final Map SPECIAL_LENGTHS = ImmutableMap.builder()
+ .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
+ .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
+ .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9)
+ .build();
+
+ /**
+ * Represents broad categories of tokens.
+ */
+ public enum TokenType {
+ TEMPORAL,
+ SEPARATOR,
+ TIMEZONE,
+ ISO_8601_DELIMITER
+ }
+
+ /**
+ * Token representation.
+ */
+ public static class Token {
+ TokenType type;
+ TemporalField temporalField; // for type TEMPORAL e.g. ChronoField.YEAR
+ TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS
+ String string; // pattern string, e.g. "yyy"
+ int length; // length (e.g. YYY: 3, FF8: 8)
+
+ public Token(TemporalField temporalField, String string, int length) {
+ this(TokenType.TEMPORAL, temporalField, null, string, length);
+ }
+
+ public Token(TemporalUnit temporalUnit, String string, int length) {
+ this(TokenType.TIMEZONE, null, temporalUnit, string, length);
+ }
+
+ public Token(TokenType tokenType, String string) {
+ this(tokenType, null, null, string, string.length());
+ }
+
+ public Token(TokenType tokenType, TemporalField temporalField, TemporalUnit temporalUnit,
+ String string, int length) {
+ this.type = tokenType;
+ this.temporalField = temporalField;
+ this.temporalUnit = temporalUnit;
+ this.string = string;
+ this.length = length;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(string);
+ sb.append(" type: ");
+ sb.append(type);
+ if (temporalField != null) {
+ sb.append(" temporalField: ");
+ sb.append(temporalField);
+ } else if (temporalUnit != null) {
+ sb.append(" temporalUnit: ");
+ sb.append(temporalUnit);
+ }
+ return sb.toString();
+ }
+ }
+
+ public HiveSqlDateTimeFormatter(String pattern, boolean forParsing) {
+ setPattern(pattern, forParsing);
+ }
+
+ /**
+ * Parse and perhaps verify the pattern.
+ */
+ private void setPattern(String pattern, boolean forParsing) {
+ assert pattern.length() < LONGEST_ACCEPTED_PATTERN : "The input format is too long";
+ this.pattern = pattern;
+
+ parsePatternToTokens(pattern);
+
+ // throw IllegalArgumentException if pattern is invalid
+ if (forParsing) {
+ verifyForParse();
+ } else {
+ verifyForFormat();
+ }
+ }
+
+ /**
+ * Parse pattern to list of tokens.
+ */
+ private String parsePatternToTokens(String pattern) {
+ tokens.clear();
+ String originalPattern = pattern;
+ pattern = pattern.toLowerCase();
+
+ // indexes of the substring we will check (includes begin, does not include end)
+ int begin=0, end=0;
+ String candidate;
+ Token lastAddedToken = null;
+
+ while (begin < pattern.length()) {
+ // if begin hasn't progressed, then pattern is not parsable
+ if (begin != end) {
+ tokens.clear();
+ throw new IllegalArgumentException("Bad date/time conversion pattern: " + pattern);
+ }
+
+ // find next token
+ for (int i = LONGEST_TOKEN_LENGTH; i > 0; i--) {
+ end = begin + i;
+ if (end > pattern.length()) { // don't go past the end of the pattern string
+ continue;
+ }
+ candidate = pattern.substring(begin, end);
+ if (isSeparator(candidate)) {
+ lastAddedToken = parseSeparatorToken(candidate, lastAddedToken);
+ begin = end;
+ break;
+ }
+ if (isIso8601Delimiter(candidate)) {
+ lastAddedToken = parseIso8601DelimiterToken(candidate);
+ begin = end;
+ break;
+ }
+ if (isTemporalToken(candidate)) {
+ lastAddedToken = parseTemporalToken(originalPattern, begin, candidate);
+ begin = end;
+ break;
+ }
+ if (isTimeZoneToken(candidate)) {
+ lastAddedToken = parseTimeZoneToken(candidate);
+ begin = end;
+ break;
+ }
+ }
+ }
+ return pattern;
+ }
+
+ private boolean isSeparator(String candidate) {
+ return candidate.length() == 1 && VALID_SEPARATORS.contains(candidate);
+ }
+
+ private boolean isIso8601Delimiter(String candidate) {
+ return candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate);
+ }
+
+ private boolean isTemporalToken(String candidate) {
+ return TEMPORAL_TOKENS.containsKey(candidate);
+ }
+
+ private boolean isTimeZoneToken(String pattern) {
+ return TIME_ZONE_TOKENS.containsKey(pattern);
+ }
+
+ private Token parseSeparatorToken(String candidate, Token lastAddedToken) {
+ // try to clump separator with immediately preceding separators (e.g. "---" counts as one
+ // separator)
+ if (lastAddedToken != null && lastAddedToken.type == TokenType.SEPARATOR) {
+ lastAddedToken.string += candidate;
+ lastAddedToken.length += 1;
+ } else {
+ lastAddedToken = new Token(TokenType.SEPARATOR, candidate);
+ tokens.add(lastAddedToken);
+ }
+ return lastAddedToken;
+ }
+
+ private Token parseIso8601DelimiterToken(String candidate) {
+ Token lastAddedToken;
+ lastAddedToken = new Token(TokenType.ISO_8601_DELIMITER, candidate.toUpperCase());
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private Token parseTemporalToken(String originalPattern, int begin, String candidate) {
+ Token lastAddedToken;
+
+ // for AM/PM, keep original case
+ if (TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
+ int subStringEnd = begin + candidate.length();
+ candidate = originalPattern.substring(begin, subStringEnd);
+ }
+ lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+ getTokenStringLength(candidate.toLowerCase()));
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private Token parseTimeZoneToken(String candidate) {
+ Token lastAddedToken;
+ lastAddedToken = new Token(TIME_ZONE_TOKENS.get(candidate), candidate,
+ getTokenStringLength(candidate));
+ tokens.add(lastAddedToken);
+ return lastAddedToken;
+ }
+
+ private int getTokenStringLength(String candidate) {
+ Integer length = SPECIAL_LENGTHS.get(candidate);
+ if (length != null) {
+ return length;
+ }
+ return candidate.length();
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for parsing strings to datetime objects.
+ */
+ private void verifyForParse() {
+
+ // create a list of tokens' temporal fields
+ ArrayList temporalFields = new ArrayList<>();
+ ArrayList timeZoneTemporalUnits = new ArrayList<>();
+ int roundYearCount=0, yearCount=0;
+ for (Token token : tokens) {
+ if (token.temporalField != null) {
+ temporalFields.add(token.temporalField);
+ if (token.temporalField == ChronoField.YEAR) {
+ if (token.string.startsWith("r")) {
+ roundYearCount += 1;
+ } else {
+ yearCount += 1;
+ }
+ }
+ } else if (token.temporalUnit != null) {
+ timeZoneTemporalUnits.add(token.temporalUnit);
+ }
+ }
+
+ if (roundYearCount > 0 && yearCount > 0) {
+ throw new IllegalArgumentException("Invalid duplication of format element: Both year and"
+ + "round year are provided");
+ }
+ for (TemporalField tokenType : temporalFields) {
+ if (Collections.frequency(temporalFields, tokenType) > 1) {
+ throw new IllegalArgumentException(
+ "Invalid duplication of format element: multiple " + tokenType.toString()
+ + " tokens provided.");
+ }
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ !(temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM))) {
+ throw new IllegalArgumentException("Missing hour token.");
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between median indicator and hour token.");
+ }
+ if (temporalFields.contains(ChronoField.HOUR_OF_AMPM) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between hour of day and hour of am/pm token.");
+ }
+ if (temporalFields.contains(ChronoField.DAY_OF_YEAR) &&
+ (temporalFields.contains(ChronoField.DAY_OF_MONTH) ||
+ temporalFields.contains(ChronoField.MONTH_OF_YEAR))) {
+ throw new IllegalArgumentException("Day of year provided with day or month token.");
+ }
+ if (temporalFields.contains(ChronoField.SECOND_OF_DAY) &&
+ (temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM) ||
+ temporalFields.contains(ChronoField.MINUTE_OF_HOUR) ||
+ temporalFields.contains(ChronoField.SECOND_OF_MINUTE))) {
+ throw new IllegalArgumentException(
+ "Second of day token conflicts with other token(s).");
+ }
+ if (timeZoneTemporalUnits.contains(ChronoUnit.MINUTES) &&
+ !timeZoneTemporalUnits.contains(ChronoUnit.HOURS)) {
+ throw new IllegalArgumentException("Time zone minute token provided without time zone hour token.");
+ }
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for formatting datetime objects to strings.
+ */
+ private void verifyForFormat() {
+ for (Token token : tokens) {
+ if (token.type == TokenType.TIMEZONE) {
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
+ + "timestamp or date.");
+ }
+ }
+ }
+
+ @Override public String format(Timestamp ts) {
+ StringBuilder fullOutputSb = new StringBuilder();
+ String outputString = null;
+ int value;
+ LocalDateTime localDateTime =
+ LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC);
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ try {
+ value = localDateTime.get(token.temporalField);
+ outputString = formatTemporal(value, token);
+ } catch (DateTimeException e) {
+ throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from "
+ + "LocalDateTime " + localDateTime, e);
+ }
+ break;
+ case TIMEZONE: //invalid for timestamp and date
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
+ + "timestamp or date.");
+ case SEPARATOR:
+ outputString = token.string;
+ break;
+ case ISO_8601_DELIMITER:
+ outputString = token.string.toUpperCase();
+ break;
+ default:
+ // won't happen
+ }
+ fullOutputSb.append(outputString);
+ }
+ return fullOutputSb.toString();
+ }
+
+ @Override public String format(Date date) {
+ return format(Timestamp.ofEpochSecond(date.toEpochSecond()));
+ }
+
+ private String formatTemporal(int value, Token token) {
+ String output;
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ output = value == 0 ? "a" : "p";
+ output += token.string.length() == 2 ? "m" : ".m.";
+ if (token.string.startsWith("A") || token.string.startsWith("P")) {
+ output = output.toUpperCase();
+ }
+ } else {
+ // it's a numeric value
+ try {
+ output = String.valueOf(value);
+ output = padOrTruncateNumericTemporal(token, output);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Value: " + value + " couldn't be cast to string.", e);
+ }
+ }
+ return output;
+ }
+
+ /**
+ * To match token.length, pad left with zeroes or truncate.
+ */
+ private String padOrTruncateNumericTemporal(Token token, String output) {
+ if (output.length() < token.length) {
+ output = StringUtils.leftPad(output, token.length, '0'); // pad left
+ } else if (output.length() > token.length) {
+ if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ output = output.substring(0, token.length); // truncate right
+ } else {
+ output = output.substring(output.length() - token.length); // truncate left
+ }
+ }
+ if (token.temporalField == ChronoField.NANO_OF_SECOND
+ && token.string.equalsIgnoreCase("ff")) {
+ output = output.replaceAll("0*$", ""); //truncate trailing 0's
+ if (output.isEmpty()) {
+ output = "0";
+ }
+ }
+ return output;
+ }
+
+ /**
+ * Left here for timestamp with local time zone.
+ */
+ private String formatTimeZone(TimeZone timeZone, LocalDateTime localDateTime, Token token) {
+ ZoneOffset offset = timeZone.toZoneId().getRules().getOffset(localDateTime);
+ Duration seconds = Duration.of(offset.get(ChronoField.OFFSET_SECONDS), ChronoUnit.SECONDS);
+ if (token.string.equals("tzh")) {
+ long hours = seconds.toHours();
+ String s = (hours >= 0) ? "+" : "-";
+ s += (Math.abs(hours) < 10) ? "0" : "";
+ s += String.valueOf(Math.abs(hours));
+ return s;
+ } else {
+ long minutes = Math.abs(seconds.toMinutes() % MINUTES_PER_HOUR);
+ String s = String.valueOf(minutes);
+ if (s.length() == 1) {
+ s = "0" + s;
+ }
+ return s;
+ }
+ }
+
+ @Override public Timestamp parseTimestamp(String fullInput){
+ LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC);
+ String substring;
+ int index = 0;
+ int value;
+ int timeZoneSign = 0, timeZoneHours = 0, timeZoneMinutes = 0;
+
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ substring = getNextSubstring(fullInput, index, token); // e.g. yy-m -> yy
+ value = parseTemporal(substring, token); // e.g. 18->2018, July->07
+ try {
+ ldt = ldt.with(token.temporalField, value);
+ } catch (DateTimeException e){
+ throw new IllegalArgumentException(
+ "Value " + value + " not valid for token " + token.toString());
+ }
+ index += substring.length();
+ break;
+ case TIMEZONE:
+ if (token.temporalUnit == ChronoUnit.HOURS) {
+ String nextCharacter = fullInput.substring(index, index + 1);
+ timeZoneSign = "-".equals(nextCharacter) ? -1 : 1;
+ if ("-".equals(nextCharacter) || "+".equals(nextCharacter)) {
+ index++;
+ }
+ // parse next two digits
+ substring = getNextSubstring(fullInput, index, index + 2, token);
+ try {
+ timeZoneHours = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneHours < -15 || timeZoneHours > 15) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" to TZH because TZH range is -15 to +15. Pattern is " + pattern);
+ }
+ } else { // time zone minutes
+ substring = getNextSubstring(fullInput, index, token);
+ try {
+ timeZoneMinutes = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneMinutes < 0 || timeZoneMinutes > 59) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" to TZM because TZM range is 0 to 59. Pattern is " + pattern);
+ }
+ }
+ index += substring.length();
+ break;
+ case SEPARATOR:
+ index = parseSeparator(fullInput, index, token);
+ break;
+ case ISO_8601_DELIMITER:
+ index = parseIso8601Delimiter(fullInput, index, token);
+ default:
+ //do nothing
+ }
+ }
+ // time zone hours -- process here because hh/hh24 may be parsed after tzh
+ ldt = ldt.minus(timeZoneSign * timeZoneHours, ChronoUnit.HOURS);
+ // time zone minutes -- process here because sign depends on tzh sign
+ ldt = ldt.minus(
+ timeZoneSign * timeZoneMinutes, ChronoUnit.MINUTES);
+
+ // anything left unparsed at end of string? throw error
+ if (!fullInput.substring(index).isEmpty()) {
+ throw new IllegalArgumentException("Leftover input after parsing: " +
+ fullInput.substring(index) + " in string " + fullInput);
+ }
+
+ return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano());
+ }
+
+ public Date parseDate(String input){
+ return Date.ofEpochMilli(parseTimestamp(input).toEpochMilli());
+ }
+ /**
+ * Return the next substring to parse. Length is either specified or token.length, but a
+ * separator or an ISO-8601 delimiter can cut the substring short. (e.g. if the token pattern is
+ * "YYYY" we expect the next 4 characters to be 4 numbers. However, if it is "976/" then we
+ * return "976" because a separator cuts it short.)
+ */
+ private String getNextSubstring(String s, int begin, Token token) {
+ return getNextSubstring(s, begin, begin + token.length, token);
+ }
+
+ private String getNextSubstring(String s, int begin, int end, Token token) {
+ if (end > s.length()) {
+ end = s.length();
+ }
+ s = s.substring(begin, end);
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ if (s.charAt(1) == 'm' || s.charAt(1) == 'M') { // length 2
+ return s.substring(0, 2);
+ } else {
+ return s;
+ }
+ }
+ for (String sep : VALID_SEPARATORS) {
+ if (s.contains(sep)) {
+ s = s.substring(0, s.indexOf(sep));
+ }
+ }
+ // TODO this will cause problems with DAY (for example, Thursday starts with T)
+ for (String delimiter : VALID_ISO_8601_DELIMITERS) {
+ if (s.toLowerCase().contains(delimiter)) {
+ s = s.substring(0, s.toLowerCase().indexOf(delimiter));
+ }
+ }
+
+ return s;
+ }
+
+ /**
+ * Get the integer value of a temporal substring.
+ */
+ private int parseTemporal(String substring, Token token){
+ // exceptions to the rule
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ return substring.toLowerCase().startsWith("a") ? AM : PM;
+
+ } else if (token.temporalField == ChronoField.YEAR) {
+ String currentYearString = String.valueOf(LocalDateTime.now().getYear());
+ //deal with round years
+ if (token.string.startsWith("r") && substring.length() == 2) {
+ int currFirst2Digits = Integer.parseInt(currentYearString.substring(0, 2));
+ int currLast2Digits = Integer.parseInt(currentYearString.substring(2));
+ int valLast2Digits = Integer.parseInt(substring);
+ if (valLast2Digits < FIFTY && currLast2Digits >= FIFTY) {
+ currFirst2Digits += 1;
+ } else if (valLast2Digits >= FIFTY && currLast2Digits < FIFTY) {
+ currFirst2Digits -= 1;
+ }
+ substring = String.valueOf(currFirst2Digits) + substring;
+ } else { // fill in prefix digits with current date
+ substring = currentYearString.substring(0, 4 - substring.length()) + substring;
+ }
+
+ } else if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ int i = Integer.min(token.length, substring.length());
+ substring += StringUtils.repeat("0", NANOS_MAX_LENGTH - i);
+ }
+
+ // the rule
+ try {
+ return Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to integer. Pattern is " + pattern, e);
+ }
+ }
+
+ /**
+ * Parse the next separator(s). At least one separator character is expected. Separator
+ * characters are interchangeable.
+ *
+ * Caveat: If the last separator character in the separator substring is "-" and is immediately
+ * followed by a time zone hour (tzh) token, it's a negative sign and not counted as a
+ * separator, UNLESS this is the only separator character in the separator substring (in
+ * which case it is not counted as the negative sign).
+ *
+ * @throws IllegalArgumentException if separator is missing
+ */
+ private int parseSeparator(String fullInput, int index, Token token){
+ int separatorsFound = 0;
+ int begin = index;
+
+ while (index < fullInput.length() &&
+ VALID_SEPARATORS.contains(fullInput.substring(index, index + 1))) {
+ if (!isLastCharacterOfSeparator(index, fullInput) || !(nextTokenIs("tzh", token))
+ || separatorsFound == 0) {
+ separatorsFound++;
+ }
+ index++;
+ }
+
+ if (separatorsFound == 0) {
+ throw new IllegalArgumentException("Missing separator at index " + index);
+ }
+ return begin + separatorsFound;
+ }
+
+ private int parseIso8601Delimiter(String fullInput, int index, Token token) {
+ String substring;
+ substring = fullInput.substring(index, index + 1);
+ if (token.string.equalsIgnoreCase(substring)) {
+ index++;
+ } else {
+ throw new IllegalArgumentException(
+ "Missing ISO 8601 delimiter " + token.string.toUpperCase());
+ }
+ return index;
+ }
+
+ /**
+ * Is the next character something other than a separator?
+ */
+ private boolean isLastCharacterOfSeparator(int index, String string) {
+ if (index == string.length() - 1) { // if we're at the end of the string, yes
+ return true;
+ }
+ return !VALID_SEPARATORS.contains(string.substring(index + 1, index + 2));
+ }
+
+ /**
+ * Does the temporalUnit/temporalField of the next token match the pattern's?
+ */
+ private boolean nextTokenIs(String pattern, Token currentToken) {
+ // make sure currentToken isn't the last one
+ if (tokens.indexOf(currentToken) == tokens.size() - 1) {
+ return false;
+ }
+ Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1);
+ pattern = pattern.toLowerCase();
+ return (isTimeZoneToken(pattern) && TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit
+ || isTemporalToken(pattern) && TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+
+ /**
+ * @return a copy of token list
+ */
+ protected List getTokens() {
+ return new ArrayList<>(tokens);
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..1e838be886
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Deals with formatting and parsing of datetime objects.
+ */
+package org.apache.hadoop.hive.common.format.datetime;
diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java
index 6ecfcf65c9..c1eb47153e 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Date.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Date.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hive.common.type;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
@@ -72,6 +75,17 @@ public String toString() {
return localDate.format(PRINT_FORMATTER);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ try {
+ return formatter.format(this);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
public int hashCode() {
return localDate.hashCode();
}
@@ -137,6 +151,13 @@ public static Date valueOf(String s) {
return new Date(localDate);
}
+ public static Date valueOf(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return valueOf(s);
+ }
+ return formatter.parseDate(s);
+ }
+
public static Date ofEpochDay(int epochDay) {
return new Date(LocalDate.ofEpochDay(epochDay));
}
diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
index a8b7b6d186..cea1e8c2e1 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
@@ -101,6 +103,17 @@ public String toString() {
return localDateTime.format(PRINT_FORMATTER);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ try {
+ return formatter.format(this);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
public int hashCode() {
return localDateTime.hashCode();
}
@@ -166,6 +179,13 @@ public static Timestamp valueOf(String s) {
return new Timestamp(localDateTime);
}
+ public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return valueOf(s);
+ }
+ return formatter.parseTimestamp(s);
+ }
+
public static Timestamp ofEpochSecond(long epochSecond) {
return ofEpochSecond(epochSecond, 0);
}
diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
index f26f8ae01e..525c95a63d 100644
--- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
+++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import java.math.BigDecimal;
@@ -171,6 +172,18 @@ public static long millisToSeconds(long millis) {
private static final int DATE_LENGTH = "YYYY-MM-DD".length();
+ public static Timestamp stringToTimestamp(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return stringToTimestamp(s);
+ }
+
+ try {
+ return Timestamp.valueOf(s, formatter);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
public static Timestamp stringToTimestamp(String s) {
s = s.trim();
// Handle simpler cases directly avoiding exceptions
diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java
index 5db14f1906..22bcd98c1d 100644
--- common/src/java/org/apache/hive/common/util/DateParser.java
+++ common/src/java/org/apache/hive/common/util/DateParser.java
@@ -17,6 +17,7 @@
*/
package org.apache.hive.common.util;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
/**
@@ -36,9 +37,13 @@ public Date parseDate(String strValue) {
}
public boolean parseDate(String strValue, Date result) {
+ return parseDate(strValue, result, null);
+ }
+
+ public boolean parseDate(String strValue, Date result, HiveDateTimeFormatter formatter) {
Date parsedVal;
try {
- parsedVal = Date.valueOf(strValue);
+ parsedVal = Date.valueOf(strValue, formatter);
} catch (IllegalArgumentException e) {
parsedVal = null;
}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java
new file mode 100644
index 0000000000..82009f08e1
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.temporal.ChronoField;
+
+/**
+ * Test class for HiveJavaDateTimeFormatter.
+ */
+public class TestHiveJavaDateTimeFormatter {
+
+ private static final DateTimeFormatter DATE_TIME_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+ builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd();
+ DATE_TIME_FORMATTER = builder.toFormatter();
+ }
+ private HiveDateTimeFormatter formatter = new HiveJavaDateTimeFormatter(DATE_TIME_FORMATTER);
+
+ @Test
+ public void testFormat() {
+ Timestamp ts = Timestamp.valueOf("2019-01-01 00:00:00.99999");
+ Assert.assertEquals("2019-01-01 00:00:00.99999", formatter.format(ts));
+ }
+
+ @Test
+ public void testParse() {
+ String s = "2019-01-01 00:00:00.99999";
+ Assert.assertEquals(Timestamp.valueOf("2019-01-01 00:00:00.99999"),
+ formatter.parseTimestamp(s));
+ }
+
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java
new file mode 100644
index 0000000000..d189c7b042
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.text.SimpleDateFormat;
+import java.time.ZoneOffset;
+import java.util.TimeZone;
+
+/**
+ * Tests HiveSimpleDateFormatter.
+ */
+public class TestHiveSimpleDateFormatter {
+
+ private HiveDateTimeFormatter formatter =
+ new HiveSimpleDateFormatter("yyyy-MM-dd HH:mm:ss", TimeZone.getTimeZone(ZoneOffset.UTC));
+
+ @Test
+ public void testFormat() {
+ verifyFormat("2019-01-01 01:01:01");
+ verifyFormat("2019-01-01 00:00:00");
+ verifyFormat("1960-01-01 23:00:00");
+ }
+
+ private void verifyFormat(String s) {
+ Timestamp ts = Timestamp.valueOf(s);
+ Assert.assertEquals(s, formatter.format(ts));
+ }
+
+ @Test
+ public void testParse() {
+ verifyParse("2019-01-01 01:10:10");
+ verifyParse("1960-01-01 23:00:00");
+
+ }
+
+ private void verifyParse(String s) {
+ Timestamp ts = Timestamp.valueOf(s);
+ Assert.assertEquals(ts, formatter.parseTimestamp(s));
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..1557f41032
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.sun.tools.javac.util.List;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.format.ResolverStyle;
+import java.time.format.SignStyle;
+import java.time.temporal.ChronoField;
+import java.time.temporal.TemporalField;
+import java.util.ArrayList;
+
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+
+/**
+ * Tests HiveSqlDateTimeFormatter.
+ */
+
+public class TestHiveSqlDateTimeFormatter extends TestCase {
+
+ private HiveSqlDateTimeFormatter formatter;
+
+ public void testSetPattern() {
+ verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", new ArrayList<>(List.of(
+ null, // represents separator, which has no temporal field
+ ChronoField.YEAR,
+ null,
+ ChronoField.MONTH_OF_YEAR,
+ null,
+ ChronoField.DAY_OF_MONTH,
+ null
+ )));
+
+ verifyPatternParsing("ymmdddhh24::mi:ss A.M. pm", 25, "ymmdddhh24::mi:ss A.M. pm",
+ new ArrayList<>(List.of(
+ ChronoField.YEAR,
+ ChronoField.MONTH_OF_YEAR,
+ ChronoField.DAY_OF_YEAR,
+ ChronoField.HOUR_OF_DAY,
+ null, ChronoField.MINUTE_OF_HOUR,
+ null, ChronoField.SECOND_OF_MINUTE,
+ null, ChronoField.AMPM_OF_DAY,
+ null, ChronoField.AMPM_OF_DAY
+ )));
+ }
+
+ public void testSetPatternWithBadPatterns() {
+ verifyBadPattern("e", true);
+ verifyBadPattern("yyyy-1", true);
+
+ verifyBadPattern("yyyy Y", true);
+ verifyBadPattern("yyyy R", true);
+ verifyBadPattern("yyyy-MM-DDD", true);
+ verifyBadPattern("yyyy-mm-DD DDD", true);
+ verifyBadPattern("yyyy-mm-dd HH24 HH12", true);
+ verifyBadPattern("yyyy-mm-dd HH24 AM", true);
+ verifyBadPattern("yyyy-mm-dd HH24 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd HH12 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SSSSS AM", true);
+ verifyBadPattern("yyyy-mm-dd MI SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SS SSSSS", true);
+
+ verifyBadPattern("tzm", false);
+ verifyBadPattern("tzh", false);
+ }
+
+ public void testFormatTimestamp() {
+ checkFormatTs("rr rrrr ddd", "2018-01-03 00:00:00", "18 2018 003");
+ checkFormatTs("yyyy-mm-ddtsssss.ff4z", "2018-02-03 00:00:10.777777777", "2018-02-03T00010.7777Z");
+ checkFormatTs("hh24:mi:ss.ff1", "2018-02-03 01:02:03.999999999", "01:02:03.9");
+ checkFormatTs("y yyy hh:mi:ss.ffz", "2018-02-03 01:02:03.0070070", "8 018 01:02:03.007007Z");
+ checkFormatTs("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03 01:02:03.0070070", "am a.m. am a.m. AM A.M. AM A.M.");
+ }
+
+ private void checkFormatTs(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toTimestamp(input)));
+ }
+
+ public void testFormatDate() {
+ checkFormatDate("rr rrrr ddd", "2018-01-03", "18 2018 003");
+ checkFormatDate("yyyy-mm-ddtsssss.ff4z", "2018-02-03", "2018-02-03T00000.0000Z");
+ checkFormatDate("hh24:mi:ss.ff1", "2018-02-03", "00:00:00.0");
+ checkFormatDate("y yyy T hh:mi:ss.ffz", "2018-02-03", "8 018 T 00:00:00.0Z");
+ checkFormatDate("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03", "am a.m. am a.m. AM A.M. AM A.M.");
+ checkFormatDate("DDD", "2019-12-31", "365");
+ checkFormatDate("DDD", "2020-12-31", "366");
+ }
+
+ private void checkFormatDate(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toDate(input)));
+ }
+
+ public void testParseTimestamp() {
+ String thisYearString = String.valueOf(LocalDateTime.now().getYear());
+ int firstTwoDigits = getFirstTwoDigits();
+
+ //y
+ checkParseTimestamp("y-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03 00:00:00");
+ checkParseTimestamp("yy-mm-dd", "00-02-03", thisYearString.substring(0, 2) + "00-02-03 00:00:00");
+ checkParseTimestamp("yyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03 00:00:00");
+ checkParseTimestamp("yyyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03 00:00:00");
+
+ //rr, rrrr
+ checkParseTimestamp("rr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03 00:00:00");
+
+ //everything else
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ff8z", "2018-02-03T04:05:06.5665Z", "2018-02-03 04:05:06.5665");
+ checkParseTimestamp("yyyy-mm-dd hh24:mi:ss.ff", "2018-02-03 04:05:06.555555555", "2018-02-03 04:05:06.555555555");
+ checkParseTimestamp("yyyy-mm-dd hh12:mi:ss", "2099-2-03 04:05:06", "2099-02-03 04:05:06");
+ checkParseTimestamp("yyyyddd", "2018284", "2018-10-11 00:00:00");
+ checkParseTimestamp("yyyyddd", "20184", "2018-01-04 00:00:00");
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ffz", "2018-02-03t04:05:06.444Z", "2018-02-03 04:05:06.444");
+ checkParseTimestamp("hh:mi:ss A.M.", "04:05:06 P.M.", "1970-01-01 16:05:06");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00--1:-30", "2019-01-01 15:30:00");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00-1:30", "2019-01-01 12:30:00");
+ checkParseTimestamp("TZM:TZH", "1 -3", "1970-01-01 03:01:00");
+ checkParseTimestamp("TZH:TZM", "-0:30", "1970-01-01 00:30:00");
+ checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02", "2333-01-01 13:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI AM", "2019-01-01 11:00 p.m.", "2019-01-01 23:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI A.M..", "2019-01-01 11:00 pm.", "2019-01-01 23:00:00");
+
+ //Test "day in year" token in a leap year scenario
+ checkParseTimestamp("YYYY DDD", "2000 60", "2000-02-29 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 61", "2000-03-01 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 366", "2000-12-31 00:00:00");
+ //Test timezone offset parsing without separators
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 02:45:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 02:45:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 13:15:00");
+ }
+
+ private int getFirstTwoDigits() {
+ int thisYear = LocalDateTime.now().getYear();
+ int firstTwoDigits = thisYear / 100;
+ if (thisYear % 100 < 50) {
+ firstTwoDigits -= 1;
+ }
+ return firstTwoDigits;
+ }
+
+ private void checkParseTimestamp(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ assertEquals(toTimestamp(expectedOutput), formatter.parseTimestamp(input));
+ }
+
+ public void testParseDate() {
+
+ String thisYearString = String.valueOf(LocalDateTime.now().getYear());
+ int firstTwoDigits = getFirstTwoDigits();
+ //y
+ checkParseDate("y-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03");
+ checkParseDate("yy-mm-dd", "00-02-03", thisYearString.substring(0, 2) + "00-02-03");
+ checkParseDate("yyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03");
+ checkParseDate("yyyy-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03");
+ checkParseDate("rr-mm-dd", "0-02-03", thisYearString.substring(0, 3) + "0-02-03");
+ checkParseDate("rrrr-mm-dd", "000-02-03", thisYearString.substring(0, 1) + "000-02-03");
+
+ //rr, rrrr
+ checkParseDate("rr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03");
+ checkParseDate("rr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03");
+ checkParseDate("rr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03");
+ checkParseDate("rr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03");
+ checkParseDate("rrrr-mm-dd", "00-02-03", firstTwoDigits + 1 + "00-02-03");
+ checkParseDate("rrrr-mm-dd", "49-02-03", firstTwoDigits + 1 + "49-02-03");
+ checkParseDate("rrrr-mm-dd", "50-02-03", firstTwoDigits + "50-02-03");
+ checkParseDate("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03");
+
+ checkParseDate("yyyy-mm-dd hh mi ss.ff7", "2018/01/01 2.2.2.55", "2018-01-01");
+ }
+
+ private void checkParseDate(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ assertEquals(toDate(expectedOutput), formatter.parseDate(input));
+ }
+
+ public void testParseTimestampError() {
+ verifyBadParseString("yyyy", "2019-02-03");
+ verifyBadParseString("yyyy-mm-dd ", "2019-02-03"); //separator missing
+ verifyBadParseString("yyyy-mm-dd", "2019-02-03..."); //extra separators
+ verifyBadParseString("yyyy-mm-dd hh12:mi:ss", "2019-02-03 14:00:00"); //hh12 out of range
+ verifyBadParseString("yyyy-dddsssss", "2019-912345");
+ verifyBadParseString("yyyy-mm-dd", "2019-13-23"); //mm out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range
+ verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range
+ }
+
+ private void verifyBadPattern(String string, boolean forParsing) {
+ try {
+ formatter = new HiveSqlDateTimeFormatter(string, forParsing);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+ }
+ }
+
+ /**
+ * Verify pattern is parsed correctly.
+ * Check:
+ * -token.temporalField for each token
+ * -sum of token.lengths
+ * -concatenation of token.strings
+ */
+ private void verifyPatternParsing(String pattern, ArrayList temporalFields) {
+ verifyPatternParsing(pattern, pattern.length(), pattern.toLowerCase(), temporalFields);
+ }
+
+ private void verifyPatternParsing(String pattern, int expectedPatternLength,
+ String expectedPattern, ArrayList temporalFields) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(temporalFields.size(), formatter.getTokens().size());
+ StringBuilder sb = new StringBuilder();
+ int actualPatternLength = 0;
+ for (int i = 0; i < temporalFields.size(); i++) {
+ assertEquals("Generated list of tokens not correct", temporalFields.get(i),
+ formatter.getTokens().get(i).temporalField);
+ sb.append(formatter.getTokens().get(i).string);
+ actualPatternLength += formatter.getTokens().get(i).length;
+ }
+ assertEquals("Token strings concatenated don't match original pattern string",
+ expectedPattern, sb.toString());
+ assertEquals(expectedPatternLength, actualPatternLength);
+ }
+
+ private void verifyBadParseString(String pattern, String string) {
+ try {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ formatter.parseTimestamp(string);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+ }
+ }
+
+
+ // Methods that construct datetime objects using java.time.DateTimeFormatter.
+
+ public static Date toDate(String s) {
+ LocalDate localDate = LocalDate.parse(s, DATE_FORMATTER);
+ return Date.ofEpochDay((int) localDate.toEpochDay());
+ }
+
+ /**
+ * This is effectively the old Timestamp.valueOf method.
+ */
+ public static Timestamp toTimestamp(String s) {
+ LocalDateTime localDateTime = LocalDateTime.parse(s.trim(), TIMESTAMP_FORMATTER);
+ return Timestamp.ofEpochSecond(
+ localDateTime.toEpochSecond(ZoneOffset.UTC), localDateTime.getNano());
+ }
+
+ private static final DateTimeFormatter DATE_FORMATTER =
+ DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ private static final DateTimeFormatter TIMESTAMP_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendLiteral(" ")
+ .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd()
+ .optionalEnd();
+ TIMESTAMP_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..70ee4266f4
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests formatting and parsing of datetime objects.
+ */
+package org.apache.hadoop.hive.common.format.datetime;
diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java
index 00a7904ecf..5bf1119cef 100644
--- common/src/test/org/apache/hive/common/util/TestTimestampParser.java
+++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java
@@ -116,8 +116,7 @@ public void testPattern1() {
};
String[] invalidCases = {
- "1945-12-31-23:59:59",
- "12345",
+ "12345"
};
testValidCases(tp, validCases);
@@ -147,8 +146,7 @@ public void testMillisParser() {
};
String[] invalidCases = {
- "1945-12-31-23:59:59",
- "1420509274123-",
+ "1420509274123-"
};
testValidCases(tp, validCases);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index d08b05fb68..c09db9af65 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -528,6 +528,7 @@
system.registerGenericUDF("to_epoch_milli", GenericUDFEpochMilli.class);
system.registerGenericUDF("bucket_number", GenericUDFBucketNumber.class);
system.registerGenericUDF("tumbling_window", GenericUDFTumbledWindow.class);
+ system.registerGenericUDF("cast_format", GenericUDFCastFormat.class);
// Generic UDTF's
system.registerGenericUDTF("explode", GenericUDTFExplode.class);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index fa9d1e9783..465464167f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -42,8 +42,11 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToVarCharViaLongToVarChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastCharToBinary;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString;
@@ -65,12 +68,17 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringGroupToChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringGroupToVarChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDateWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestampWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64ColumnInList;
@@ -155,6 +163,7 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.TimestampColumnNotBetween;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharColumnBetween;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharColumnNotBetween;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
@@ -2290,6 +2299,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
ve = new BucketNumExpression(outCol);
ve.setInputTypeInfos(returnType);
ve.setOutputTypeInfo(returnType);
+ } else if (udf instanceof GenericUDFCastFormat) {
+ ve = getCastWithFormat(udf, childExpr, returnType);
}
if (ve != null) {
return ve;
@@ -3300,6 +3311,54 @@ private VectorExpression getCastToLongExpression(List childExpr, P
return null;
}
+ private VectorExpression getCastWithFormat(
+ GenericUDF udf, List childExpr, TypeInfo returnType) throws HiveException {
+ String inputType = childExpr.get(1).getTypeString();
+ childExpr.remove(0); // index 0 not needed since we know returnType
+
+ Class> veClass = getCastFormatVectorExpressionClass(childExpr, returnType, inputType);
+ return createVectorExpression(
+ veClass, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
+
+ private Class> getCastFormatVectorExpressionClass(List childExpr,
+ TypeInfo returnType, String inputType) throws HiveException {
+ switch (inputType) {
+ case serdeConstants.TIMESTAMP_TYPE_NAME:
+ if (returnType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
+ return CastTimestampToStringWithFormat.class;
+ }
+ if (returnType.getTypeName().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ return CastTimestampToVarCharWithFormat.class;
+ }
+ if (returnType.getTypeName().startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ return CastTimestampToCharWithFormat.class;
+ }
+ case serdeConstants.DATE_TYPE_NAME:
+ if (returnType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
+ return CastDateToStringWithFormat.class;
+ }
+ if (returnType.getTypeName().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ return CastDateToVarCharWithFormat.class;
+ }
+ if (returnType.getTypeName().startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ return CastDateToCharWithFormat.class;
+ }
+ }
+ if (inputType.equals(serdeConstants.STRING_TYPE_NAME)
+ || inputType.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || inputType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ switch (returnType.getTypeName()) {
+ case serdeConstants.TIMESTAMP_TYPE_NAME:
+ return CastStringToTimestampWithFormat.class;
+ case serdeConstants.DATE_TYPE_NAME:
+ return CastStringToDateWithFormat.class;
+ }
+ }
+ throw new HiveException(
+ "Expression cast " + inputType + " to " + returnType + " format not" + " vectorizable");
+ }
+
private VectorExpression tryDecimal64Between(VectorExpressionDescriptor.Mode mode, boolean isNot,
ExprNodeDesc colExpr, List childrenAfterNot, TypeInfo returnTypeInfo)
throws HiveException {
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java
new file mode 100644
index 0000000000..3093dd7ecf
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO CHAR() WITH FORMAT ).
+ */
+public class CastDateToCharWithFormat extends CastDateToChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToCharWithFormat() {
+ super();
+ }
+
+ public CastDateToCharWithFormat(int inputColumn, byte[] patternBytes, int len, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to char with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
index dfa9f8a00d..d206bbb00a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
@@ -18,28 +18,31 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import java.sql.Date;
-import java.text.SimpleDateFormat;
import java.util.TimeZone;
public class CastDateToString extends LongToStringUnaryUDF {
private static final long serialVersionUID = 1L;
protected transient Date dt = new Date(0);
- private transient SimpleDateFormat formatter;
+ private transient HiveDateTimeFormatter formatter;
public CastDateToString() {
super();
- formatter = new SimpleDateFormat("yyyy-MM-dd");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ initFormatter();
}
public CastDateToString(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
- formatter = new SimpleDateFormat("yyyy-MM-dd");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ initFormatter();
+ }
+
+ public void initFormatter() {
+ formatter = new HiveSimpleDateFormatter("yyyy-MM-dd", TimeZone.getTimeZone("UTC"));
}
// The assign method will be overridden for CHAR and VARCHAR.
@@ -47,10 +50,23 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
outV.setVal(i, bytes, 0, length);
}
+ private void assignNull(BytesColumnVector outV, int i) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+
@Override
protected void func(BytesColumnVector outV, long[] vector, int i) {
- dt.setTime(DateWritableV2.daysToMillis((int) vector[i]));
- byte[] temp = formatter.format(dt).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, vector, i, formatter);
+ }
+
+ protected void func(BytesColumnVector outV, long[] vector, int i, HiveDateTimeFormatter formatter) {
+ try {
+ byte[] temp = formatter.format(
+ org.apache.hadoop.hive.common.type.Date.ofEpochDay((int) vector[i])).getBytes();
+ assign(outV, i, temp, temp.length);
+ } catch (Exception e) {
+ assignNull(outV, i);
+ }
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
new file mode 100644
index 0000000000..e0321159df
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastDateToStringWithFormat extends CastDateToString {
+ private static final long serialVersionUID = 1L;
+ protected transient Date dt;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToStringWithFormat() {
+ super();
+ }
+
+ public CastDateToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to string with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ // The assign method will be overridden for CHAR and VARCHAR.
+ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
+ outV.setVal(i, bytes, 0, length);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java
new file mode 100644
index 0000000000..c84f223b82
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO VARCHAR() WITH FORMAT ).
+ */
+public class CastDateToVarCharWithFormat extends CastDateToVarChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToVarCharWithFormat() {
+ super();
+ }
+
+ public CastDateToVarCharWithFormat(int inputColumn, byte[] patternBytes, int len, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to varchar with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index a6dff12e1a..44a451b3bc 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -151,10 +152,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ /**
+ * Used by CastStringToDate.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ evaluate(outputColVector, inV, i, null);
+ }
+
+ /**
+ * Used by CastStringToDateWithFormat.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i,
+ HiveDateTimeFormatter formatter) {
String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
Date hDate = new Date();
- if (dateParser.parseDate(dateString, hDate)) {
+ if (dateParser.parseDate(dateString, hDate, formatter)) {
outputColVector.vector[i] = DateWritableV2.dateToDays(hDate);
return;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
new file mode 100644
index 0000000000..e242ad5ea2
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO DATE WITH FORMAT ).
+ */
+public class CastStringToDateWithFormat extends CastStringToDate {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToDateWithFormat() {
+ super();
+ }
+
+ public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to date with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), true);
+ }
+
+ @Override
+ protected void evaluate(LongColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
index b48b0136eb..f8d81cdb13 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
@@ -19,8 +19,9 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.util.Arrays;
-import java.sql.Timestamp;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -143,21 +144,40 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ /**
+ * This is used by CastStringToTimestamp.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ evaluate(outputColVector, inputColVector, i, null);
+ }
+
+ /**
+ * This is used by CastStringToTimestampWithFormat.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector,
+ int i, HiveDateTimeFormatter formatter) {
try {
- org.apache.hadoop.hive.common.type.Timestamp timestamp =
- PrimitiveObjectInspectorUtils.getTimestampFromString(
+ Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestampFromString(
new String(
inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i],
- "UTF-8"));
- outputColVector.set(i, timestamp.toSqlTimestamp());
+ "UTF-8"),
+ formatter);
+ if (timestamp != null) {
+ outputColVector.set(i, timestamp.toSqlTimestamp());
+ } else {
+ setNullValue(outputColVector, i);
+ }
} catch (Exception e) {
- outputColVector.setNullValue(i);
- outputColVector.isNull[i] = true;
- outputColVector.noNulls = false;
+ setNullValue(outputColVector, i);
}
}
+ private void setNullValue(TimestampColumnVector outputColVector, int i) {
+ outputColVector.setNullValue(i);
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+
@Override
public String vectorExpressionParameters() {
return getColumnParamString(0, inputColumn);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
new file mode 100644
index 0000000000..eeeaa5d935
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO TIMESTAMP WITH FORMAT ).
+ */
+public class CastStringToTimestampWithFormat extends CastStringToTimestamp {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToTimestampWithFormat() {
+ super();
+ }
+
+ public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to timestamp with format"
+ + "), but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), true);
+ }
+
+ @Override
+ protected void evaluate(TimestampColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java
new file mode 100644
index 0000000000..5334200eba
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO CHAR( WITH FORMAT ).
+ */
+public class CastTimestampToCharWithFormat extends CastTimestampToChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToCharWithFormat() {
+ super();
+ }
+
+ public CastTimestampToCharWithFormat(int inputColumn, byte[] patternBytes, int len, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to char with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
index adc3a9d7b9..61da01fda0 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -42,12 +44,20 @@
PRINT_FORMATTER = builder.toFormatter();
}
+ private transient HiveDateTimeFormatter format;
+
public CastTimestampToString() {
super();
+ initFormatter();
}
public CastTimestampToString(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
+ initFormatter();
+ }
+
+ private void initFormatter() {
+ format = new HiveJavaDateTimeFormatter(PRINT_FORMATTER);
}
// The assign method will be overridden for CHAR and VARCHAR.
@@ -55,14 +65,27 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
outV.setVal(i, bytes, 0, length);
}
+ private void assignNull(BytesColumnVector outV, int i) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+
@Override
protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
- byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC)
- .withNano(inV.nanos[i])
- .format(PRINT_FORMATTER).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, inV, i, format);
}
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i, HiveDateTimeFormatter formatter) {
+ try {
+ String formattedLocalDateTime = formatter.format(
+ org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i]));
+
+ byte[] temp = formattedLocalDateTime.getBytes();
+ assign(outV, i, temp, temp.length);
+ } catch (Exception e) {
+ assignNull(outV, i);
+ }
+ }
public static String getTimestampString(Timestamp ts) {
return
LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC)
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
new file mode 100644
index 0000000000..774fa6ff50
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastTimestampToStringWithFormat extends CastTimestampToString {
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToStringWithFormat() {
+ super();
+ }
+
+ public CastTimestampToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to string with format"
+ + " ), but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java
new file mode 100644
index 0000000000..02c30f642d
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO VARCHAR( WITH FORMAT ).
+ */
+public class CastTimestampToVarCharWithFormat extends CastTimestampToVarChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToVarCharWithFormat() {
+ super();
+ }
+
+ public CastTimestampToVarCharWithFormat(int inputColumn, byte[] patternBytes, int len, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to varchar with format"
+ + "), but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 58fe0cd32e..013079c3d2 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -247,8 +247,18 @@ castExpression
LPAREN
expression
KW_AS
- primitiveType
- RPAREN -> ^(TOK_FUNCTION primitiveType expression)
+ toType=primitiveType
+ (fmt=KW_FORMAT StringLiteral)?
+ RPAREN
+ // simple cast
+ -> {$fmt == null}? ^(TOK_FUNCTION $toType expression)
+
+ // plain cast ... format: toType is int representing a TOK_* in HiveParser_IdentifiersParser, expression, format pattern
+ -> {((CommonTree)toType.getTree()).getChild(0) == null}?
+ ^(TOK_FUNCTION {adaptor.create(Identifier, "cast_format")} NumberLiteral[Integer.toString(((CommonTree)toType.getTree()).token.getType())] expression StringLiteral)
+
+ // cast ... format to type with 4th parameter which is length of CHAR or VARCHAR
+ -> ^(TOK_FUNCTION {adaptor.create(Identifier, "cast_format")} NumberLiteral[Integer.toString(((CommonTree)toType.getTree()).token.getType())] expression StringLiteral NumberLiteral[((CommonTree)toType.getTree()).getChild(0).getText()])
;
caseExpression
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCastFormat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCastFormat.java
new file mode 100644
index 0000000000..da6a26fcbf
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCastFormat.java
@@ -0,0 +1,188 @@
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.Map;
+
+/**
+ * Vector expressions: CastDateToCharWithFormat, CastDateToStringWithFormat,
+ * CastDateToVarCharWithFormat, CastTimestampToCharWithFormat,
+ * CastTimestampToStringWithFormat, CastTimestampToVarCharWithFormat
+ * Could not use @VectorizedExpressions annotation because e.g. CastXToCharWithFormat,
+ * CastXToStringWithFormat, CastXToVarCharWithFormat would have same description.
+ */
+@Description(name = "cast_format",
+ value = "CAST( AS [FORMAT ]) - Converts a datetime value to string or"
+ + " string-type value to datetime based on the format pattern specified.",
+ extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will "
+ + "be used.\n"
+ + "Example:\n "
+ + " > SELECT CAST(\"2018-01-01 4 PM\" AS timestamp FORMAT \"yyyy-mm-dd hh12 AM\");\n"
+ + " 2018-01-01 16:00:00")
+public class GenericUDFCastFormat extends GenericUDF implements Serializable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(GenericUDFCastFormat.class.getName());
+
+ @VisibleForTesting
+ protected static final Map OUTPUT_TYPES = ImmutableMap.builder()
+ .put(HiveParser_IdentifiersParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_CHAR, serdeConstants.CHAR_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME)
+ .put(HiveParser_IdentifiersParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME).build();
+
+ private transient ObjectInspectorConverters.ConverterWithFormatOption converter;
+
+ public GenericUDFCastFormat() {
+ }
+
+ /**
+ * @param arguments
+ * 0. const int, value of a HiveParser_IdentifiersParser constant which represents a TOK_[TYPE]
+ * 1. expression to convert
+ * 2. constant string, format pattern
+ * 3. (optional) constant int, output char/varchar length
+ */
+ @Override public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+ if (arguments.length != 3 && arguments.length != 4) {
+ throw new UDFArgumentException(
+ "Function cast_format requires 3 or 4 arguments (int, expression, StringLiteral"
+ + "[, var/char length]), got " + arguments.length);
+ }
+
+ PrimitiveObjectInspector outputOI = getOutputOI(arguments);
+ PrimitiveObjectInspector inputOI;
+ try {
+ inputOI = (PrimitiveObjectInspector) arguments[1];
+ } catch (ClassCastException e) {
+ throw new UDFArgumentException(
+ "Function CAST...as ... FORMAT ...takes only primitive types");
+ }
+ PrimitiveObjectInspectorUtils.PrimitiveGrouping inputPG =
+ PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputOI.getPrimitiveCategory());
+ PrimitiveObjectInspectorUtils.PrimitiveGrouping outputPG =
+ PrimitiveObjectInspectorUtils.getPrimitiveGrouping(outputOI.getPrimitiveCategory());
+
+ if (inputOI.getPrimitiveCategory()
+ == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMPLOCALTZ) {
+ throw new UDFArgumentException(
+ "Timestamp with local time zone not yet supported for cast ... format function");
+ }
+ if (!(inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP
+ && outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP
+ || inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP
+ && outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP
+ || inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP)) {
+ throw new UDFArgumentException(
+ "Function CAST...as ... FORMAT ... only converts datetime objects to string types"
+ + " and string or void objects to datetime types. Type of object provided: "
+ + outputOI.getPrimitiveCategory() + " in primitive grouping " + inputPG
+ + ", type provided: " + inputOI.getPrimitiveCategory() + " in primitive grouping "
+ + outputPG);
+ }
+
+ boolean forParsing = (outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP);
+ converter = getConverter(inputOI, outputOI);
+ if (converter == null) {
+ throw new UDFArgumentException("Function Function CAST...as ... FORMAT ... couldn't create "
+ + "converter from inputOI " + inputOI + " and outputOI " + outputOI);
+ }
+ converter.setDateTimeFormatter(
+ new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 2), forParsing));
+
+ return outputOI;
+ }
+
+ private PrimitiveObjectInspector getOutputOI(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+ int key = getConstantIntValue(arguments, 0);
+ if (!OUTPUT_TYPES.keySet().contains(key)) {
+ throw new UDFArgumentException("Cast...format can only convert to DATE, TIMESTAMP, STRING,"
+ + "VARCHAR, CHAR. Can't convert to HiveParser_IdentifiersParser constant with value "
+ + key);
+ }
+ String typeString = OUTPUT_TYPES.get(key);
+ if (serdeConstants.VARCHAR_TYPE_NAME.equals(typeString)
+ || serdeConstants.CHAR_TYPE_NAME.equals(typeString)) {
+ if (arguments.length < 4 || arguments[3] == null) {
+ throw new UDFArgumentException(typeString + " missing length argument");
+ }
+ typeString += "(" + getConstantIntValue(arguments, 3) + ")";
+ }
+ PrimitiveTypeInfo typeInfo = TypeInfoFactory.getPrimitiveTypeInfo(typeString);
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
+ }
+
+ private ObjectInspectorConverters.ConverterWithFormatOption getConverter(
+ PrimitiveObjectInspector inputOI, PrimitiveObjectInspector outputOI) {
+ switch (outputOI.getPrimitiveCategory()) {
+ case STRING:
+ return new PrimitiveObjectInspectorConverter.TextConverter(inputOI);
+ case CHAR:
+ return new PrimitiveObjectInspectorConverter.HiveCharConverter(inputOI,
+ (SettableHiveCharObjectInspector) outputOI);
+ case VARCHAR:
+ return new PrimitiveObjectInspectorConverter.HiveVarcharConverter(inputOI,
+ (SettableHiveVarcharObjectInspector) outputOI);
+ case TIMESTAMP:
+ return new PrimitiveObjectInspectorConverter.TimestampConverter(inputOI,
+ (SettableTimestampObjectInspector) outputOI);
+ case DATE:
+ return new PrimitiveObjectInspectorConverter.DateConverter(inputOI,
+ (SettableDateObjectInspector) outputOI);
+ }
+ return null;
+ }
+
+ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ Object o0 = arguments[1].get();
+ if (o0 == null) {
+ return null;
+ }
+ return converter.convert(o0);
+ }
+
+ @Override public String getDisplayString(String[] children) {
+ assert children.length == 3 || children.length == 4;
+ StringBuilder sb = new StringBuilder();
+ sb.append("CAST( ");
+ sb.append(children[1]);
+ sb.append(" AS ");
+ int typeKey = Integer.parseInt(children[0]);
+ if (!OUTPUT_TYPES.keySet().contains(typeKey)) {
+ sb.append("HiveParsers_IdentifiersParser index ").append(typeKey);
+ } else {
+ sb.append(OUTPUT_TYPES.get(typeKey));
+ if (children.length == 4) {
+ sb.append("(").append(children[3]).append(")");
+ }
+ }
+ sb.append(" FORMAT ");
+ sb.append(children[2]);
+ sb.append(" )");
+ return sb.toString();
+ }
+}
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
index bcc4114099..6c3c3349bb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import java.text.SimpleDateFormat;
-
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol;
@@ -46,7 +44,6 @@
+ " '2009-07-29'")
@VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class})
public class GenericUDFDateSub extends GenericUDFDateAdd {
- private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
public GenericUDFDateSub() {
this.signModifier = -1;
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
index 663237739e..092d320ecd 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.Random;
+import org.apache.hadoop.hive.common.type.Date;
import org.junit.Assert;
import org.apache.hadoop.hive.serde2.RandomTypeUtil;
@@ -115,18 +116,20 @@ public void testRoundToDecimalPlaces() throws HiveException {
Assert.assertEquals(1.2346d, resultV.vector[7], Double.MIN_VALUE);
}
- static int DAYS_LIMIT = 365 * 9999;
+ private static final int DAYS_LIMIT = 365 * 9999;
+ //approximate, so we get some negative values:
+ private static final int SMALLEST_EPOCH_DAY = -365 * 1969;
public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) {
Random r = new Random(12099);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector inV;
TimestampColumnVector outV;
- inV = new LongColumnVector();
- outV = new TimestampColumnVector();
+ inV = new LongColumnVector(intValues.length);
+ outV = new TimestampColumnVector(intValues.length);
for (int i = 0; i < intValues.length; i++) {
- intValues[i] = r.nextInt() % DAYS_LIMIT;
+ intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT;
inV.vector[i] = intValues[i];
}
@@ -137,6 +140,36 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) {
+ // get date in timestamp out, and change timestamp out to string out
+ VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues);
+ BytesColumnVector outV = new BytesColumnVector(intValues.length);
+ batch.cols[1] = outV;
+ return batch;
+ }
+
+ // For testing CastDateToStringWithFormat with
+ // TestVectorTypeCastsWithFormat#testCastDateToStringWithFormat
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ LongColumnVector dateColumnV;
+ BytesColumnVector stringColumnV;
+ dateColumnV = new LongColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ dateColumnV.vector[0] = Date.valueOf("2019-12-31").toEpochDay();
+ dateColumnV.vector[1] = Date.valueOf("1776-07-04").toEpochDay();
+ dateColumnV.vector[2] = Date.valueOf("2012-02-29").toEpochDay();
+ dateColumnV.vector[3] = Date.valueOf("1580-08-08").toEpochDay();
+ dateColumnV.vector[4] = Date.valueOf("0005-01-01").toEpochDay();
+ dateColumnV.vector[5] = Date.valueOf("9999-12-31").toEpochDay();
+
+ batch.cols[0] = dateColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() {
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector lcv;
@@ -277,6 +310,42 @@ public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() {
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchStringInTimestampOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("2019-12-31 00:00:00.999999999").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776-07-04 17:07:06.177617761").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012-02-29 23:59:59.999999999").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580-08-08 00:00:00.0").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005-01-01 00:00:00.0").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999-12-31 23:59:59.999999999").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchStringInDateOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("19/12/31").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776--07--04").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012/02/29").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580/08/08").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005/01/01").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999/12/31").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) {
Random r = new Random(345);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
@@ -297,6 +366,55 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[]
return batch;
}
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut(
+ long[] epochSecondValues, int[] nanoValues) {
+ Random r = new Random(345);
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ batch.size = epochSecondValues.length;
+
+ TimestampColumnVector inV;
+ BytesColumnVector outV;
+ inV = new TimestampColumnVector(batch.size);
+ outV = new BytesColumnVector(batch.size);
+
+ for (int i = 0; i < batch.size; i++) {
+ Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+ epochSecondValues[i] = randTimestamp.toEpochSecond();
+ nanoValues[i] = randTimestamp.getNanos();
+ inV.set(i, randTimestamp.toSqlTimestamp());
+ }
+
+ batch.cols[0] = inV;
+ batch.cols[1] = outV;
+
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ TimestampColumnVector timestampColumnV;
+ BytesColumnVector stringColumnV;
+ timestampColumnV = new TimestampColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ timestampColumnV.set(0, getSqlTimestamp("2019-12-31 19:20:21.999999999"));
+ timestampColumnV.set(1, getSqlTimestamp("1776-07-04 17:07:06.177617761"));
+ timestampColumnV.set(2, getSqlTimestamp("2012-02-29 23:59:59.999999999"));
+ timestampColumnV.set(3, getSqlTimestamp("1580-08-08 00:00:00"));
+ timestampColumnV.set(4, getSqlTimestamp("0005-01-01 00:00:00"));
+ timestampColumnV.set(5, getSqlTimestamp("9999-12-31 23:59:59.999999999"));
+
+ batch.cols[0] = timestampColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
+ private static java.sql.Timestamp getSqlTimestamp(String s) {
+ return Timestamp.valueOf(s).toSqlTimestamp();
+ }
+
static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L;
public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
index 58fd7b030e..a449ea143d 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
@@ -23,7 +23,9 @@
import static org.junit.Assert.assertTrue;
import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
+import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@@ -72,6 +74,30 @@ public void testVectorCastDoubleToLong() throws HiveException {
Assert.assertEquals(1, resultV.vector[6]);
}
+ // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final
+ // value because VM in test time zone is PST.
+ private static final long TIME_DIFFERENCE = 28800000L;
+ @Test
+ public void testCastDateToString() throws HiveException {
+ int[] intValues = new int[100];
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDateToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < intValues.length; i++) {
+ expected =
+ new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+
+ Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result);
+ }
+ }
+
@Test
public void testCastDateToTimestamp() throws HiveException {
int[] intValues = new int[500];
@@ -192,6 +218,31 @@ public void testCastTimestampToDouble() throws HiveException {
}
}
+ @Test
+ public void testCastTimestampToString() throws HiveException {
+ int numberToTest = 100;
+ long[] epochSecondValues = new long[numberToTest];
+ int[] nanoValues = new int[numberToTest];
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastTimestampToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < numberToTest; i++) {
+ expected = org.apache.hadoop.hive.common.type.Timestamp
+ .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+ Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] +
+ " nanoseconds: " + nanoValues[i],
+ expected, result);
+ }
+ }
+
public byte[] toBytes(String s) {
byte[] b = null;
try {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
new file mode 100644
index 0000000000..11faa41d50
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+/**
+ * Tests vectorized type cast udfs CastDateToStringWithFormat, CastTimestampToStringWithFormat,
+ * CastStringToDateWithFormat, CastStringToTimestampWithFormat.
+ */
+public class TestVectorTypeCastsWithFormat {
+
+ @Test
+ public void testCastDateToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastDateToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+ verifyString(0, "2019", resultV);
+ verifyString(1, "1776", resultV);
+ verifyString(2, "2012", resultV);
+ verifyString(3, "1580", resultV);
+ verifyString(4, "0005", resultV);
+ verifyString(5, "9999", resultV);
+
+ expr = new CastDateToStringWithFormat(0, "MM".getBytes(), 1);
+ resultV = new BytesColumnVector();
+ b.cols[1] = resultV;
+ expr.evaluate(b);
+ verifyString(0, "12", resultV);
+ verifyString(1, "07", resultV);
+ verifyString(2, "02", resultV);
+ verifyString(3, "08", resultV);
+ verifyString(4, "01", resultV);
+ verifyString(5, "12", resultV);
+ }
+
+ @Test
+ public void testCastTimestampToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastTimestampToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("2019", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("1776", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("2012", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("1580", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("0005", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("9999", getStringFromBytesColumnVector(resultV, 5));
+
+ resultV = new BytesColumnVector();
+ b.cols[1] = resultV;
+ expr = new CastTimestampToStringWithFormat(0, "HH24".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("19", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("17", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 5));
+ }
+
+ @Test
+ public void testCastStringToTimestampWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInTimestampOutFormatted();
+ TimestampColumnVector resultV;
+ resultV = new TimestampColumnVector();
+ b.cols[1] = resultV;
+ VectorExpression expr =
+ new CastStringToTimestampWithFormat(0, "yyyy.mm.dd HH24.mi.ss.ff".getBytes(), 1);
+ expr.evaluate(b);
+
+ verifyTimestamp("2019-12-31 00:00:00.999999999", resultV, 0);
+ verifyTimestamp("1776-07-04 17:07:06.177617761", resultV, 1);
+ verifyTimestamp("2012-02-29 23:59:59.999999999", resultV, 2);
+ verifyTimestamp("1580-08-08 00:00:00", resultV, 3);
+ verifyTimestamp("0005-01-01 00:00:00", resultV, 4);
+ verifyTimestamp("9999-12-31 23:59:59.999999999", resultV, 5);
+ }
+
+ private void verifyTimestamp(String tsString, TimestampColumnVector resultV, int index) {
+ Assert.assertEquals(Timestamp.valueOf(tsString).toEpochMilli(), resultV.time[index]);
+ Assert.assertEquals(Timestamp.valueOf(tsString).getNanos(), resultV.nanos[index]);
+ }
+
+ @Test
+ public void testCastStringToDateWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInDateOutFormatted();
+ LongColumnVector resultV;
+ resultV = new LongColumnVector();
+ b.cols[1] = resultV;
+ VectorExpression expr = new CastStringToDateWithFormat(0, "yyyy.mm.dd".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals(Date.valueOf("2019-12-31").toEpochDay(), resultV.vector[0]); // frogmethod why does this work - it doesn't have the extra char
+ Assert.assertEquals(Date.valueOf("1776-07-04").toEpochDay(), resultV.vector[1]);
+ Assert.assertEquals(Date.valueOf("2012-02-29").toEpochDay(), resultV.vector[2]);
+ Assert.assertEquals(Date.valueOf("1580-08-08").toEpochDay(), resultV.vector[3]);
+ Assert.assertEquals(Date.valueOf("0005-01-01").toEpochDay(), resultV.vector[4]);
+ Assert.assertEquals(Date.valueOf("9999-12-31").toEpochDay(), resultV.vector[5]);
+ }
+
+ private void verifyString(int resultIndex, String expected, BytesColumnVector resultV) {
+ String result = getStringFromBytesColumnVector(resultV, resultIndex);
+ Assert.assertEquals(expected, result);
+ }
+
+ private String getStringFromBytesColumnVector(BytesColumnVector resultV, int i) {
+ String result;
+ byte[] resultBytes = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(resultBytes, StandardCharsets.UTF_8);
+ return result;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
new file mode 100644
index 0000000000..ab82b8a8df
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertNull;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests CAST ( AS STRING/CHAR/VARCHAR FORMAT ) and
+ * CAST ( AS TIMESTAMP/DATE FORMAT )
+ */
+public class TestGenericUDFCastWithFormat {
+
+ //type codes
+ public static final int CHAR = HiveParser_IdentifiersParser.TOK_CHAR;
+ public static final int VARCHAR = HiveParser_IdentifiersParser.TOK_VARCHAR;
+ public static final int STRING = HiveParser_IdentifiersParser.TOK_STRING;
+ public static final int DATE = HiveParser_IdentifiersParser.TOK_DATE;
+ public static final int TIMESTAMP = HiveParser_IdentifiersParser.TOK_TIMESTAMP;
+
+ @Test
+ public void testDateToStringWithFormat() throws HiveException {
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ testCast(STRING, inputOI, date("2009-07-30"), "yyyy-MM-dd", "2009-07-30");
+ testCast(STRING, inputOI, date("2009-07-30"), "yyyy", "2009");
+ testCast(STRING, inputOI, date("1969-07-30"), "dd", "30");
+
+ testCast(CHAR, 3, inputOI, date("2009-07-30"),"yyyy-MM-dd", "200");
+ testCast(CHAR, 3, inputOI, date("2009-07-30"), "yyyy", "200");
+ testCast(CHAR, 3, inputOI, date("1969-07-30"), "dd", "30 ");
+
+ testCast(VARCHAR, 3, inputOI, date("2009-07-30"),"yyyy-MM-dd", "200");
+ testCast(VARCHAR, 3, inputOI, date("2009-07-30"), "yyyy", "200");
+ testCast(VARCHAR, 3, inputOI, date("1969-07-30"), "dd", "30");
+ }
+
+ @Test public void testTimestampToStringTypesWithFormat() throws HiveException {
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ testCast(STRING, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "2009-07-30 00:00:08");
+ testCast(STRING, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/30/2009 110200");
+ testCast(STRING, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07");
+ testCast(STRING, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69");
+
+ testCast(CHAR, 3, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "200");
+ testCast(CHAR, 3, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/");
+ testCast(CHAR, 3, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07 ");
+ testCast(CHAR, 3, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69 ");
+
+ testCast(VARCHAR, 3, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "200");
+ testCast(VARCHAR, 3, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/");
+ testCast(VARCHAR, 3, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07");
+ testCast(VARCHAR, 3, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69");
+ }
+
+ @Test public void testStringTypesToDateWithFormat() throws HiveException {
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(DATE, inputOI, "2009-07-30", "yyyy-MM-dd", "2009-07-30");
+ testCast(DATE, inputOI, "2009", "yyyy", "2009-01-01");
+ testCast(DATE, inputOI, "30", "dd", "1970-01-30");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector;
+ testCast(DATE, inputOI, new HiveChar("2009-07-30", 7), "yyyy-MM", "2009-07-01");
+ testCast(DATE, inputOI, new HiveChar("2009", 7), "yyyy", "2009-01-01");
+ testCast(DATE, inputOI, new HiveChar("30", 7), "dd", "1970-01-30");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector;
+ testCast(DATE, inputOI, new HiveVarchar("2009-07-30", 7), "yyyy-MM", "2009-07-01");
+ testCast(DATE, inputOI, new HiveVarchar("2009", 7), "yyyy", "2009-01-01");
+ testCast(DATE, inputOI, new HiveVarchar("30", 7), "dd", "1970-01-30");
+ }
+
+ @Test public void testStringTypesToTimestampWithFormat() throws HiveException {
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(TIMESTAMP, inputOI, "2009-07-30 01:02:03", "yyyy-MM-dd HH24:mi:ss",
+ "2009-07-30 01:02:03");
+ testCast(TIMESTAMP, inputOI, "2009", "yyyy", "2009-01-01 00:00:00");
+ testCast(TIMESTAMP, inputOI, "07/30/2009 11:0200", "MM/dd/yyyy hh24:miss",
+ "2009-07-30 11:02:00");
+ testCast(TIMESTAMP, inputOI, "969.07.30.", "yyy.MM.dd.", "2969-07-30 00:00:00");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector;
+ testCast(TIMESTAMP, 13, inputOI, new HiveChar("2009-07-30 01:02:03", 13), "yyyy-MM-dd HH24",
+ "2009-07-30 01:00:00");
+ testCast(TIMESTAMP, 7, inputOI, new HiveChar("2009", 7), "yyyy", "2009-01-01 00:00:00");
+ testCast(TIMESTAMP, 18, inputOI, new HiveChar("07/30/2009 11:0200", 18), "MM/dd/yyyy hh24:miss",
+ "2009-07-30 11:02:00");
+ testCast(TIMESTAMP, 7, inputOI, new HiveChar("969.07.30.", 7), "yyy.MM.",
+ "2969-07-01 00:00:00");
+
+ inputOI = PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector;
+ testCast(TIMESTAMP, 13, inputOI, new HiveVarchar("2009-07-30 01:02:03", 13), "yyyy-MM-dd HH24",
+ "2009-07-30 01:00:00");
+ testCast(TIMESTAMP, 7, inputOI, new HiveVarchar("2009", 7), "yyyy", "2009-01-01 00:00:00");
+ testCast(TIMESTAMP, 18, inputOI, new HiveVarchar("07/30/2009 11:0200", 18),
+ "MM/dd/yyyy hh24:miss", "2009-07-30 11:02:00");
+ testCast(TIMESTAMP, 7, inputOI, new HiveVarchar("969.07.30.", 7), "yyy.MM.",
+ "2969-07-01 00:00:00");
+ }
+
+ private TimestampWritableV2 timestamp(String s) {
+ return new TimestampWritableV2(Timestamp.valueOf(s));
+ }
+
+ private DateWritableV2 date(String s) {
+ return new DateWritableV2(Date.valueOf(s));
+ }
+
+ private void testCast(int typeCode, ObjectInspector inputOI, Object input, String format,
+ String expOutput) throws HiveException {
+ testCast(typeCode, 0, inputOI, input, format, expOutput);
+ }
+
+ private void testCast(int typeCode, int length, ObjectInspector inputOI, Object input, String format,
+ String expOutput)
+ throws HiveException {
+ // initialize
+ GenericUDFCastFormat udf = new GenericUDFCastFormat();
+ ConstantObjectInspector typeCodeOI =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("int"), new IntWritable(typeCode));
+ ConstantObjectInspector formatOI =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format));
+ ConstantObjectInspector lengthOI =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("int"), new IntWritable(length));
+ ObjectInspector[] initArgs = {typeCodeOI, inputOI, formatOI, lengthOI};
+ udf.initialize(initArgs);
+
+ // evaluate
+ GenericUDF.DeferredObject typeCodeObj = new GenericUDF.DeferredJavaObject(typeCode);
+ GenericUDF.DeferredObject inputObj = new GenericUDF.DeferredJavaObject(input);
+ GenericUDF.DeferredObject formatObj = new GenericUDF.DeferredJavaObject(new Text(format));
+ GenericUDF.DeferredObject lengthObj = new GenericUDF.DeferredJavaObject(length);
+ GenericUDF.DeferredObject[] evalArgs = { typeCodeObj, inputObj, formatObj, lengthObj };
+ Object output = udf.evaluate(evalArgs);
+ if (output == null) {
+ fail(
+ "Cast " + inputOI.getTypeName() + " \"" + input + "\" to " + GenericUDFCastFormat.OUTPUT_TYPES
+ .get(typeCode) + " failed, output null");
+ }
+ assertEquals(
+ "Cast " + inputOI.getTypeName() + " \"" + input + "\" to " + GenericUDFCastFormat.OUTPUT_TYPES.get(typeCode)
+ + " failed ", expOutput, output.toString());
+
+ // Try with null input
+ GenericUDF.DeferredObject[] nullArgs =
+ { typeCodeObj, new GenericUDF.DeferredJavaObject(null), formatObj, lengthObj };
+ assertNull(udf.getFuncName() + " with NULL arguments failed", udf.evaluate(nullArgs));
+ }
+}
diff --git ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
new file mode 100644
index 0000000000..daeb61c975
--- /dev/null
+++ ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
@@ -0,0 +1,63 @@
+--non-vectorized
+set hive.vectorized.execution.enabled=false;
+set hive.fetch.task.conversion=more;
+
+create table timestamp1 (t timestamp) stored as parquet;
+insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+;
+from timestamp1 select cast (t as string format "yyyy hh24...PM ff");
+from timestamp1 select cast (t as char(11) format "yyyy hh24...PM ff"); -- will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff"); -- will be truncated
+
+create table dates (d date) stored as parquet;
+insert into dates values
+("2020-02-03"),
+("1969-12-31")
+;
+from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9");
+from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9"); -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9"); -- will be truncated
+
+create table strings (s string) stored as parquet;
+create table varchars (s varchar(11)) stored as parquet;
+create table chars (s char(11)) stored as parquet;
+insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+;
+insert into varchars select * from strings;
+insert into chars select * from strings;
+
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
+
+
+--correct descriptions
+explain from strings select cast (s as timestamp format "yyy.mm.dd");
+explain from strings select cast (s as date format "yyy.mm.dd");
+explain from timestamp1 select cast (t as string format "yyyy");
+explain from timestamp1 select cast (t as varchar(12) format "yyyy");
+
+
+--vectorized
+set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+from timestamp1 select cast (t as string format "yyyy");
+from dates select cast (d as string format "yyyy");
+from timestamp1 select cast (t as varchar(11) format "yyyy");
+from dates select cast (d as varchar(11) format "yyyy");
+from timestamp1 select cast (t as char(11) format "yyyy");
+from dates select cast (d as char(11) format "yyyy");
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
diff --git ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
new file mode 100644
index 0000000000..1456466fcb
--- /dev/null
+++ ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
@@ -0,0 +1,449 @@
+PREHOOK: query: create table timestamp1 (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamp1
+POSTHOOK: query: create table timestamp1 (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamp1
+PREHOOK: query: insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamp1
+POSTHOOK: query: insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamp1
+POSTHOOK: Lineage: timestamp1.t SCRIPT []
+PREHOOK: query: from timestamp1 select cast (t as string format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as string format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020 00...AM 0
+1969 23...PM 999999999
+PREHOOK: query: from timestamp1 select cast (t as char(11) format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as char(11) format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020 00...A
+1969 23...P
+PREHOOK: query: -- will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: -- will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020 00...A
+1969 23...P
+PREHOOK: query: -- will be truncated
+
+create table dates (d date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates
+POSTHOOK: query: -- will be truncated
+
+create table dates (d date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates
+PREHOOK: query: insert into dates values
+("2020-02-03"),
+("1969-12-31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates
+POSTHOOK: query: insert into dates values
+("2020-02-03"),
+("1969-12-31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates
+POSTHOOK: Lineage: dates.d SCRIPT []
+PREHOOK: query: from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03 , 00 00 00 000000000
+1969 12 31 , 00 00 00 000000000
+PREHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03
+1969 12 31
+PREHOOK: query: -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03
+1969 12 31
+PREHOOK: query: -- will be truncated
+
+create table strings (s string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@strings
+POSTHOOK: query: -- will be truncated
+
+create table strings (s string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@strings
+PREHOOK: query: create table varchars (s varchar(11)) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchars
+POSTHOOK: query: create table varchars (s varchar(11)) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchars
+PREHOOK: query: create table chars (s char(11)) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@chars
+POSTHOOK: query: create table chars (s char(11)) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@chars
+PREHOOK: query: insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@strings
+POSTHOOK: query: insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@strings
+POSTHOOK: Lineage: strings.s SCRIPT []
+PREHOOK: query: insert into varchars select * from strings
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+PREHOOK: Output: default@varchars
+POSTHOOK: query: insert into varchars select * from strings
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+POSTHOOK: Output: default@varchars
+POSTHOOK: Lineage: varchars.s EXPRESSION [(strings)strings.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: insert into chars select * from strings
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+PREHOOK: Output: default@chars
+POSTHOOK: query: insert into chars select * from strings
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+POSTHOOK: Output: default@chars
+POSTHOOK: Lineage: chars.s EXPRESSION [(strings)strings.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: explain from strings select cast (s as timestamp format "yyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain from strings select cast (s as timestamp format "yyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS timestamp FORMAT 'yyy.mm.dd' ) (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS date FORMAT 'yyy.mm.dd' ) (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from timestamp1 select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: explain from timestamp1 select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: timestamp1
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS string FORMAT 'yyyy' ) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from timestamp1 select cast (t as varchar(12) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: explain from timestamp1 select cast (t as varchar(12) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: timestamp1
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS varchar(12) FORMAT 'yyyy' ) (type: varchar(12))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: from timestamp1 select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from dates select cast (d as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from timestamp1 select cast (t as varchar(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as varchar(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from dates select cast (d as varchar(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as varchar(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from timestamp1 select cast (t as char(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamp1 select cast (t as char(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp1
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from dates select cast (d as char(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as char(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out
index 374e9c4fce..84a9243da9 100644
--- ql/src/test/results/clientpositive/show_functions.q.out
+++ ql/src/test/results/clientpositive/show_functions.q.out
@@ -62,6 +62,7 @@ bucket_number
buildversion
cardinality_violation
case
+cast_format
cbrt
ceil
ceiling
@@ -349,6 +350,7 @@ POSTHOOK: query: SHOW FUNCTIONS '^c.*'
POSTHOOK: type: SHOWFUNCTIONS
cardinality_violation
case
+cast_format
cbrt
ceil
ceiling
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
index 4b6a3d6c10..4ff4732324 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
@@ -21,6 +21,7 @@
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;
@@ -147,6 +148,10 @@ public String toString() {
return date.toString();
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ return date.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
return date.toEpochDay();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
index 9aa7f19ab2..5972bd92b5 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.time.format.DateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.common.type.TimestampUtils;
@@ -387,6 +388,16 @@ public String toString() {
return timestamp.format(DATE_TIME_FORMAT);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ if (timestampEmpty) {
+ populateTimestamp();
+ }
+ return timestamp.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
long seconds = getSeconds();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java
index 9129177375..b654dc36a3 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java
@@ -24,6 +24,7 @@
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -45,7 +46,6 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampLocalTZObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
/**
* ObjectInspectorConverters.
@@ -61,6 +61,9 @@
public static interface Converter {
Object convert(Object input);
}
+ public interface ConverterWithFormatOption extends Converter {
+ void setDateTimeFormatter(HiveDateTimeFormatter formatter);
+ }
/**
* IdentityConverter.
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
index 84c027d51c..1ff338ddc4 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
@@ -20,6 +20,7 @@
import java.time.ZoneId;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -32,6 +33,7 @@
import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
import org.apache.hadoop.hive.serde2.lazy.LazyLong;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.ConverterWithFormatOption;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
import org.apache.hadoop.io.BytesWritable;
@@ -246,10 +248,11 @@ public Object convert(Object input) {
}
}
- public static class DateConverter implements Converter {
+ public static class DateConverter implements ConverterWithFormatOption {
PrimitiveObjectInspector inputOI;
SettableDateObjectInspector outputOI;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public DateConverter(PrimitiveObjectInspector inputOI,
SettableDateObjectInspector outputOI) {
@@ -263,15 +266,20 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input,
- inputOI));
+ inputOI, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
- public static class TimestampConverter implements Converter {
+ public static class TimestampConverter implements ConverterWithFormatOption {
PrimitiveObjectInspector inputOI;
SettableTimestampObjectInspector outputOI;
boolean intToTimestampInSeconds = false;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public TimestampConverter(PrimitiveObjectInspector inputOI,
SettableTimestampObjectInspector outputOI) {
@@ -289,7 +297,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestamp(input,
- inputOI, intToTimestampInSeconds));
+ inputOI, intToTimestampInSeconds, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -409,13 +421,14 @@ public Object convert(Object input) {
/**
* A helper class to convert any primitive to Text.
*/
- public static class TextConverter implements Converter {
+ public static class TextConverter implements ConverterWithFormatOption {
private final PrimitiveObjectInspector inputOI;
private final Text t = new Text();
private final ByteStream.Output out = new ByteStream.Output();
private static byte[] trueBytes = {'T', 'R', 'U', 'E'};
private static byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'};
+ private HiveDateTimeFormatter formatter = null;
public TextConverter(PrimitiveObjectInspector inputOI) {
// The output ObjectInspector is writableStringObjectInspector.
@@ -486,11 +499,12 @@ public Text convert(Object input) {
}
return t;
case DATE:
- t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
+ t.set(((DateObjectInspector) inputOI)
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMP:
t.set(((TimestampObjectInspector) inputOI)
- .getPrimitiveWritableObject(input).toString());
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMPLOCALTZ:
t.set(((TimestampLocalTZObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
@@ -520,6 +534,10 @@ public Text convert(Object input) {
throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName());
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
/**
@@ -540,11 +558,12 @@ public Object convert(Object input) {
}
- public static class HiveVarcharConverter implements Converter {
+ public static class HiveVarcharConverter implements ConverterWithFormatOption {
PrimitiveObjectInspector inputOI;
SettableHiveVarcharObjectInspector outputOI;
Object hc;
+ private HiveDateTimeFormatter formatter;
public HiveVarcharConverter(PrimitiveObjectInspector inputOI,
SettableHiveVarcharObjectInspector outputOI) {
@@ -567,21 +586,26 @@ public Object convert(Object input) {
return null;
}
switch (inputOI.getPrimitiveCategory()) {
- case BOOLEAN:
- return outputOI.set(hc,
- ((BooleanObjectInspector) inputOI).get(input) ?
- new HiveVarchar("TRUE", -1) : new HiveVarchar("FALSE", -1));
- default:
- return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI));
+ case BOOLEAN:
+ return outputOI.set(hc,
+ ((BooleanObjectInspector) inputOI).get(input) ? new HiveVarchar("TRUE",
+ -1) : new HiveVarchar("FALSE", -1));
+ default:
+ return outputOI
+ .set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI, formatter));
}
}
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
- public static class HiveCharConverter implements Converter {
+ public static class HiveCharConverter implements ConverterWithFormatOption {
PrimitiveObjectInspector inputOI;
SettableHiveCharObjectInspector outputOI;
Object hc;
+ private HiveDateTimeFormatter formatter;
public HiveCharConverter(PrimitiveObjectInspector inputOI,
SettableHiveCharObjectInspector outputOI) {
@@ -601,8 +625,13 @@ public Object convert(Object input) {
((BooleanObjectInspector) inputOI).get(input) ?
new HiveChar("TRUE", -1) : new HiveChar("FALSE", -1));
default:
- return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI));
+ return outputOI.set(hc,
+ PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI, formatter));
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
}
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 3886b202c7..6cf231e7ae 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -29,6 +29,7 @@
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -891,12 +892,18 @@ public static float getFloat(Object o, PrimitiveObjectInspector oi) {
return (float) getDouble(o, oi);
}
+ public static String getString(Object o, PrimitiveObjectInspector oi) {
+ return getString(o, oi, null);
+ }
+
/**
* Get the String value out of a primitive object. Note that
* NullPointerException will be thrown if o is null. Note that
* RuntimeException will be thrown if o is not a valid string.
+ * HiveDateTimeFormatter is optional.
*/
- public static String getString(Object o, PrimitiveObjectInspector oi) {
+ public static String getString(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
@@ -951,10 +958,12 @@ public static String getString(Object o, PrimitiveObjectInspector oi) {
result = hcoi.getPrimitiveJavaObject(o).toString();
break;
case DATE:
- result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o)
+ .toStringFormatted(formatter);
break;
case TIMESTAMP:
- result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)
+ .toStringFormatted(formatter);
break;
case TIMESTAMPLOCALTZ:
result = ((TimestampLocalTZObjectInspector) oi).getPrimitiveWritableObject(o).toString();
@@ -978,25 +987,35 @@ public static String getString(Object o, PrimitiveObjectInspector oi) {
}
public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi) {
+ return getHiveChar(o, oi, null);
+ }
+
+ public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
HiveChar result = null;
switch (oi.getPrimitiveCategory()) {
- case CHAR:
- result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o);
- break;
- default:
- // No char length available, copy whole string value here.
- result = new HiveChar();
- result.setValue(getString(o, oi));
- break;
+ case CHAR:
+ result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o);
+ break;
+ default:
+ // No char length available, copy whole string value here.
+ result = new HiveChar();
+ result.setValue(getString(o, oi, formatter));
+ break;
}
return result;
}
public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) {
+ return getHiveVarchar(o, oi, null);
+ }
+
+ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
@@ -1004,16 +1023,16 @@ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi)
HiveVarchar result = null;
switch (oi.getPrimitiveCategory()) {
- case VARCHAR:
- result = ((HiveVarcharObjectInspector)oi).getPrimitiveJavaObject(o);
- break;
- default:
- // Is there a way to provide char length here?
- // It might actually be ok as long as there is an object inspector (with char length)
- // receiving this value.
- result = new HiveVarchar();
- result.setValue(getString(o, oi));
- break;
+ case VARCHAR:
+ result = ((HiveVarcharObjectInspector) oi).getPrimitiveJavaObject(o);
+ break;
+ default:
+ // Is there a way to provide char length here?
+ // It might actually be ok as long as there is an object inspector (with char length)
+ // receiving this value.
+ result = new HiveVarchar();
+ result.setValue(getString(o, oi, formatter));
+ break;
}
return result;
}
@@ -1113,6 +1132,11 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi)
}
public static Date getDate(Object o, PrimitiveObjectInspector oi) {
+ return getDate(o, oi, null);
+ }
+
+ public static Date getDate(
+ Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
@@ -1125,13 +1149,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
StringObjectInspector soi = (StringObjectInspector) oi;
String s = soi.getPrimitiveJavaObject(o).trim();
try {
- if (s.length() == DATE_LENGTH) {
- result = Date.valueOf(s);
- } else {
- Timestamp ts = getTimestampFromString(s);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(s, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1141,13 +1161,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
case VARCHAR: {
String val = getString(o, oi).trim();
try {
- if (val.length() == DATE_LENGTH) {
- result = Date.valueOf(val);
- } else {
- Timestamp ts = getTimestampFromString(val);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(val, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1177,11 +1193,46 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
return result;
}
+ private final static int DATE_LENGTH = "YYYY-MM-DD".length();
+ private static Date getDateFromString(String s, HiveDateTimeFormatter formatter) {
+
+ // with SQL formats
+ if (formatter != null) {
+ try {
+ return Date.valueOf(s, formatter);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
+ // without SQL formats
+ if (s.length() == DATE_LENGTH) {
+ return Date.valueOf(s);
+ } else {
+ Timestamp ts = getTimestampFromString(s);
+ if (ts != null) {
+ return Date.ofEpochMilli(ts.toEpochMilli());
+ }
+ }
+ return null;
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) {
return getTimestamp(o, oi, false);
}
+ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
+ return getTimestamp(o, oi, false, formatter);
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, boolean intToTimestampInSeconds) {
+ return getTimestamp(o, inputOI, intToTimestampInSeconds, null);
+ }
+
+ public static Timestamp getTimestamp(Object o,
+ PrimitiveObjectInspector inputOI,
+ boolean intToTimestampInSeconds,
+ HiveDateTimeFormatter format) {
if (o == null) {
return null;
}
@@ -1225,11 +1276,11 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
case STRING:
StringObjectInspector soi = (StringObjectInspector) inputOI;
String s = soi.getPrimitiveJavaObject(o);
- result = getTimestampFromString(s);
+ result = getTimestampFromString(s, format);
break;
case CHAR:
case VARCHAR:
- result = getTimestampFromString(getString(o, inputOI));
+ result = getTimestampFromString(getString(o, inputOI), format);
break;
case DATE:
result = Timestamp.ofEpochMilli(
@@ -1254,15 +1305,17 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
return result;
}
- private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length();
- private final static int DATE_LENGTH = "YYYY-MM-DD".length();
-
public static Timestamp getTimestampFromString(String s) {
+ return getTimestampFromString(s, null);
+ }
+
+ public static Timestamp getTimestampFromString(String s, HiveDateTimeFormatter formatter) {
+
s = s.trim();
s = trimNanoTimestamp(s);
try {
- return TimestampUtils.stringToTimestamp(s);
+ return TimestampUtils.stringToTimestamp(s, formatter);
} catch (IllegalArgumentException e) {
return null;
}
@@ -1284,19 +1337,6 @@ private static String trimNanoTimestamp(String s) {
return s;
}
- private static boolean isValidTimeStamp(final String s) {
- if (s.length() == TS_LENGTH ||
- (s.contains(".") &&
- s.substring(0, s.indexOf('.')).length() == TS_LENGTH)) {
- // Possible timestamp
- if (s.charAt(DATE_LENGTH) == '-') {
- return false;
- }
- return true;
- }
- return false;
- }
-
public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi,
ZoneId timeZone) {
if (o == null) {