diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
new file mode 100644
index 0000000000..a158d4befd
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.util.TimeZone;
+
+/**
+ * Interface used for formatting and parsing timestamps. Initially created so that user is able to
+ * optionally format datetime objects into strings and parse strings into datetime objects with
+ * SQL:2016 semantics, as well as with the legacy (java.text.SimpleDateFormat) format.
+ */
+public interface HiveDateTimeFormatter {
+ /**
+ * Format the given timestamp into a string.
+ *
+ * @throws IllegalArgumentException if timestamp cannot be formatted.
+ */
+ String format(Timestamp ts);
+
+ /**
+ * Format the given date into a string.
+ *
+ * @throws IllegalArgumentException if date cannot be formatted.
+ */
+ String format(Date date);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws IllegalArgumentException if string cannot be parsed.
+ */
+ Timestamp parseTimestamp(String string);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws IllegalArgumentException if string cannot be parsed.
+ */
+ Date parseDate(String string);
+
+ /**
+ * Get the format pattern to be used for formatting datetime objects or parsing strings.
+ */
+ String getPattern();
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java
new file mode 100644
index 0000000000..409a902e65
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+
+/**
+ * Wrapper for DateTimeFormatter in the java.time package.
+ */
+public class HiveJavaDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private DateTimeFormatter formatter;
+
+ public HiveJavaDateTimeFormatter(DateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
+
+ @Override public String format(Timestamp ts) {
+ return formatter.format(
+ LocalDateTime.ofInstant(
+ Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneId.of("UTC")));
+ }
+
+ @Override public String format(Date date) {
+ return format(Timestamp.ofEpochMilli(date.toEpochMilli()));
+ }
+
+ @Override public Timestamp parseTimestamp(String string) {
+ LocalDateTime ldt = LocalDateTime.parse(string, formatter);
+ return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano());
+ }
+
+ @Override public Date parseDate(String string) {
+ return Date.ofEpochMilli(parseTimestamp(string).toEpochMilli());
+ }
+
+ @Override public String getPattern() {
+ return formatter.toString();
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java
new file mode 100644
index 0000000000..2f8070d613
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.TimeZone;
+
+/**
+ * Wrapper for java.text.SimpleDateFormat.
+ */
+public class HiveSimpleDateFormatter implements HiveDateTimeFormatter {
+
+ private SimpleDateFormat format = new SimpleDateFormat();
+ private String pattern;
+
+ public HiveSimpleDateFormatter(String pattern, TimeZone timeZone) {
+ setPattern(pattern);
+ format.setTimeZone(timeZone);
+ }
+
+ @Override public String format(Timestamp ts) {
+ Date date = new Date(ts.toEpochMilli());
+ return format.format(date);
+ }
+
+ @Override public String format(org.apache.hadoop.hive.common.type.Date date) {
+ return null;
+ }
+
+ @Override public Timestamp parseTimestamp(String string) {
+ try {
+ Date date = format.parse(string);
+ return Timestamp.ofEpochMilli(date.getTime());
+ } catch (java.text.ParseException e) {
+ throw new IllegalArgumentException(
+ "String " + string + " could not be parsed by java.text.SimpleDateFormat: " + format);
+ }
+ }
+
+ @Override public org.apache.hadoop.hive.common.type.Date parseDate(String string) {
+ return org.apache.hadoop.hive.common.type.Date.ofEpochMilli(
+ parseTimestamp(string).toEpochMilli());
+ }
+
+ private void setPattern(String pattern) {
+ format.applyPattern(pattern);
+ this.pattern = pattern;
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..d0e2b805e9
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -0,0 +1,831 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.DateTimeException;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.temporal.ChronoField;
+import java.time.temporal.ChronoUnit;
+import java.time.temporal.TemporalField;
+import java.time.temporal.TemporalUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * Formatter using SQL:2016 datetime patterns.
+ *
+ * For all tokens:
+ * - Patterns are case-insensitive, except AM/PM and T/Z. See these sections for more details.
+ * - For string to datetime conversion, no duplicate format tokens are allowed, including tokens
+ * that have the same meaning but different lengths ("Y" and "YY" conflict) or different
+ * behaviors ("RR" and "YY" conflict).
+ *
+ * For all numeric tokens:
+ * - The "expected length" of input/output is the number of tokens in the character (e.g. "YYY": 3,
+ * "Y": 1, and so on), with some exceptions (see map SPECIAL_LENGTHS).
+ * - For string to datetime conversion, inputs of fewer digits than expected are accepted if
+ * followed by a delimiter, e.g. format="YYYY-MM-DD", input="19-1-1", output=2019-01-01 00:00:00.
+ * - For datetime to string conversion, output is left padded with zeros, e.g. format="DD SSSSS",
+ * input=2019-01-01 00:00:03, output="01 00003".
+ *
+ *
+ * Accepted format tokens:
+ * Note: "|" means "or". "Delimiter" means a separator, tokens T or Z, or end of input.
+ *
+ * A. Temporal tokens
+ * YYYY
+ * 4-digit year
+ * - For string to datetime conversion, prefix digits for 1, 2, and 3-digit inputs are obtained
+ * from current date
+ * E.g. input=‘9-01-01’, pattern =‘YYYY-MM-DD’, current year=2020, output=2029-01-01 00:00:00
+ *
+ *
+ * YYY
+ * Last 3 digits of a year
+ * - Gets the prefix digit from current date.
+ * - Can accept fewer digits than 3, similarly to YYYY.
+ *
+ * YY
+ * Last 2 digits of a year
+ * - Gets the 2 prefix digits from current date.
+ * - Can accept fewer digits than 2, similarly to YYYY.
+ *
+ * Y
+ * Last digit of a year
+ * - Gets the 3 prefix digits from current date.
+ *
+ * RRRR
+ * 4-digit rounded year
+ * - String to datetime conversion:
+ * - If 2 digits are provided then acts like RR.
+ * - If 1,3 or 4 digits provided then acts like YYYY.
+ * - For datetime to string conversion, acts like YYYY.
+ *
+ * RR
+ * 2-digit rounded year
+ * -String to datetime conversion:
+ * - Semantics:
+ * Input: Last 2 digits of current year: First 2 digits of output:
+ * 0 to 49 00 to 49 First 2 digits of current year
+ * 0 to 49 50 to 99 First 2 digits of current year + 1
+ * 50 to 99 00 to 49 First 2 digits of current year - 1
+ * 50 to 99 50 to 99 First 2 digits of current year
+ * - If 1-digit year is provided followed by a delimiter, falls back to YYYY with 1-digit year
+ * input.
+ * - For datetime to string conversion, acts like YY.
+ *
+ * MM
+ * Month (1-12)
+ * - For string to datetime conversion, conflicts with DDD.
+ *
+ * DD
+ * Day of month (1-31)
+ * - For string to datetime conversion, conflicts with DDD.
+ *
+ * DDD
+ * Day of year (1-366)
+ * - For string to datetime conversion, conflicts with DD and MM.
+ *
+ * HH
+ * Hour of day (1-12)
+ * - If no AM/PM provided then defaults to AM.
+ * - In string to datetime conversion, conflicts with SSSSS and HH24.
+ *
+ * HH12
+ * Hour of day (1-12)
+ * See HH.
+ *
+ * HH24
+ * Hour of day (0-23)
+ * - In string to datetime conversion, conflicts with SSSSS, HH12 and AM/PM.
+ *
+ * MI
+ * Minute of hour (0-59)
+ * - In string to datetime conversion, conflicts with SSSSS.
+ *
+ * SS
+ * Second of minute (0-59)
+ * - In string to datetime conversion, conflicts with SSSSS.
+ *
+ * SSSSS
+ * Second of Day (0-86399)
+ * - In string to datetime conversion, conflicts with SS, HH, HH12, HH24, MI, AM/PM.
+ *
+ * FF[1..9]
+ * Fraction of second
+ * - 1..9 indicates the number of decimal digits. "FF" (no number of digits specified) is also
+ * accepted.
+ * - In datetime to string conversion, "FF" will omit trailing zeros, or output "0" if subsecond
+ * value is 0.
+ * - In string to datetime conversion, fewer digits than expected are accepted if followed by a
+ * delimiter. "FF" acts like "FF9".
+ *
+ * AM|A.M.
+ * Meridiem indicator or AM/PM
+ * - Datetime to string conversion:
+ * - AM and PM mean the exact same thing in the pattern.
+ * e.g. input=2019-01-01 20:00, format=“AM”, output=“PM”.
+ * - Retains the exact format (capitalization and length) provided in the pattern string. If p.m.
+ * is in the pattern, we expect a.m. or p.m. in the output; if AM is in the pattern, we expect
+ * AM or PM in the output.
+ * - String to datetime conversion:
+ * - Conflicts with HH24 and SSSSS.
+ * - It doesn’t matter which meridian indicator is in the pattern.
+ * E.g. input="2019-01-01 11:00 p.m.", pattern="YYYY-MM-DD HH12:MI AM",
+ * output=2019-01-01 23:00:00
+ *
+ * PM|P.M.
+ * Meridiem indicator
+ * See AM|A.M.
+ *
+ * B. Time zone tokens
+ * TZH
+ * Time zone offset hour (-15 to +15)
+ * - 3-character-long input is expected: 1 character for the sign and 2 digits for the value.
+ * e.g. “+10”, “-05”
+ * - 2-digit input is accepted without the sign, e.g. “04”.
+ * - Both these 2 and 3-digit versions are accepted even if not followed by separators.
+ * - Disabled for timestamp to string and date to string conversion, as timestamp and date are time
+ * zone agnostic.
+ *
+ * TZM
+ * Time zone offset minute (0-59)
+ * - For string to datetime conversion:
+ * - TZH token is required.
+ * - Unsigned; sign comes from TZH.
+ * - Therefore time zone offsets like “-30” minutes should be expressed thus: input=“-00:30”
+ * pattern=“TZH:TZM”.
+ * - Disabled for timestamp to string and date to string conversion, as timestamp and date are time
+ * zone agnostic.
+ *
+ * C. Separators
+ * -|.|/|,|'|;|:|
+ * Separator
+ * - Uses loose matching. Existence of a sequence of separators in the format should match the
+ * existence of a sequence of separators in the input regardless of the types of the separator or
+ * the length of the sequence where length > 1. E.g. input=“2019-. ;10/10”, pattern=“YYYY-MM-DD”
+ * is valid; input=“20191010”, pattern=“YYYY-MM-DD” is not valid.
+ * - If the last separator character in the separator substring is "-" and is immediately followed
+ * by a time zone hour (tzh) token, it's a negative sign and not counted as a separator, UNLESS
+ * this is the only possible separator character in the separator substring (in which case it is
+ * not counted as the tzh's negative sign).
+ *
+ * D. ISO 8601 delimiters
+ * T
+ * ISO 8601 delimiter
+ * - Serves as a delimiter.
+ * - Function is to support formats like “YYYY-MM-DDTHH24:MI:SS.FF9Z”, “YYYY-MM-DD-HH24:MI:SSZ”
+ * - For datetime to string conversion, output is always capitalized ("T"), even if lowercase ("t")
+ * is provided in the pattern.
+ *
+ * Z
+ * ISO 8601 delimiter
+ * See T.
+ */
+
+public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private static final int LONGEST_TOKEN_LENGTH = 5;
+ private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake
+ private static final long MINUTES_PER_HOUR = 60;
+ private static final int FIFTY = 50;
+ private static final int NANOS_MAX_LENGTH = 9;
+ public static final int AM = 0;
+ public static final int PM = 1;
+ private String pattern;
+ protected List tokens = new ArrayList<>();
+
+ private static final Map VALID_TEMPORAL_TOKENS =
+ ImmutableMap.builder()
+ .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR)
+ .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR)
+ .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR)
+ .put("mm", ChronoField.MONTH_OF_YEAR)
+ .put("dd", ChronoField.DAY_OF_MONTH)
+ .put("ddd", ChronoField.DAY_OF_YEAR)
+ .put("hh", ChronoField.HOUR_OF_AMPM)
+ .put("hh12", ChronoField.HOUR_OF_AMPM)
+ .put("hh24", ChronoField.HOUR_OF_DAY)
+ .put("mi", ChronoField.MINUTE_OF_HOUR)
+ .put("ss", ChronoField.SECOND_OF_MINUTE)
+ .put("sssss", ChronoField.SECOND_OF_DAY)
+ .put("ff1", ChronoField.NANO_OF_SECOND).put("ff2", ChronoField.NANO_OF_SECOND)
+ .put("ff3", ChronoField.NANO_OF_SECOND).put("ff4", ChronoField.NANO_OF_SECOND)
+ .put("ff5", ChronoField.NANO_OF_SECOND).put("ff6", ChronoField.NANO_OF_SECOND)
+ .put("ff7", ChronoField.NANO_OF_SECOND).put("ff8", ChronoField.NANO_OF_SECOND)
+ .put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND)
+ .put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY)
+ .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY)
+ .build();
+
+ private static final Map VALID_TIME_ZONE_TOKENS =
+ ImmutableMap.builder()
+ .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build();
+
+ static final List VALID_ISO_8601_DELIMITERS =
+ ImmutableList.of("t", "z");
+
+ private static final List VALID_SEPARATORS =
+ ImmutableList.of("-", ":", " ", ".", "/", ";", "\'", ",");
+
+ private static final Map SPECIAL_LENGTHS = ImmutableMap.builder()
+ .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
+ .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
+ .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9)
+ .build();
+
+ /**
+ * Represents broad categories of tokens.
+ */
+ public enum TokenType {
+ TEMPORAL,
+ SEPARATOR,
+ TIMEZONE,
+ ISO_8601_DELIMITER
+ }
+
+ /**
+ * Token representation.
+ */
+ public static class Token {
+ TokenType type;
+ TemporalField temporalField; // for type TEMPORAL e.g. ChronoField.YEAR
+ TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS
+ String string; // pattern string, e.g. "yyy"
+ int length; // length (e.g. YYY: 3, FF8: 8)
+
+ public Token(TemporalField temporalField, String string, int length) {
+ this(TokenType.TEMPORAL, temporalField, null, string, length);
+ }
+
+ public Token(TemporalUnit temporalUnit, String string, int length) {
+ this(TokenType.TIMEZONE, null, temporalUnit, string, length);
+ }
+
+ public Token(TokenType tokenType, String string) {
+ this(tokenType, null, null, string, string.length());
+ }
+
+ public Token(TokenType tokenType, TemporalField temporalField, TemporalUnit temporalUnit,
+ String string, int length) {
+ this.type = tokenType;
+ this.temporalField = temporalField;
+ this.temporalUnit = temporalUnit;
+ this.string = string;
+ this.length = length;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(string);
+ sb.append(" type: ");
+ sb.append(type);
+ if (temporalField != null) {
+ sb.append(" temporalField: ");
+ sb.append(temporalField);
+ } else if (temporalUnit != null) {
+ sb.append(" temporalUnit: ");
+ sb.append(temporalUnit);
+ }
+ return sb.toString();
+ }
+ }
+
+ public HiveSqlDateTimeFormatter(String pattern, boolean forParsing) {
+ setPattern(pattern, forParsing);
+ }
+
+ /**
+ * Parse and perhaps verify the pattern.
+ */
+ private void setPattern(String pattern, boolean forParsing) {
+ assert pattern.length() < LONGEST_ACCEPTED_PATTERN : "The input format is too long";
+
+ this.pattern = parsePatternToTokens(pattern);
+
+ // throw Exception if list of tokens doesn't make sense for parsing. Formatting is less picky.
+ if (forParsing) {
+ verifyForParse();
+ } else {
+ verifyForFormat();
+ }
+ }
+
+ /**
+ * Parse pattern to list of tokens.
+ */
+ private String parsePatternToTokens(String pattern) {
+ tokens.clear();
+ String originalPattern = pattern;
+ pattern = pattern.toLowerCase();
+
+ // indexes of the substring we will check (includes begin, does not include end)
+ int begin=0, end=0;
+ String candidate;
+ Token lastAddedToken = null;
+
+ while (begin < pattern.length()) {
+
+ // if begin hasn't progressed, then something is unparseable
+ if (begin != end) {
+ tokens.clear();
+ throw new IllegalArgumentException("Bad date/time conversion format: " + pattern);
+ }
+
+ //process next token: start with substring
+ for (int i = LONGEST_TOKEN_LENGTH; i > 0; i--) {
+ end = begin + i;
+ if (end > pattern.length()) { // don't go past the end of the pattern string
+ continue;
+ }
+ candidate = pattern.substring(begin, end);
+ // if it's a separator, then clump it with immediately preceding separators (e.g. "---"
+ // counts as one separator).
+ if (candidate.length() == 1 && VALID_SEPARATORS.contains(candidate)) {
+ if (lastAddedToken != null && lastAddedToken.type == TokenType.SEPARATOR) {
+ lastAddedToken.string += candidate;
+ lastAddedToken.length += 1;
+ } else {
+ lastAddedToken = new Token(TokenType.SEPARATOR, candidate);
+ tokens.add(lastAddedToken);
+ }
+ begin = end;
+ break;
+ } else if (candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate)) {
+ lastAddedToken = new Token(TokenType.ISO_8601_DELIMITER, candidate.toUpperCase());
+ tokens.add(lastAddedToken);
+ begin = end;
+ break;
+ //temporal token
+ } else if (VALID_TEMPORAL_TOKENS.keySet().contains(candidate)) {
+ // for AM/PM, keep original case
+ if (VALID_TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
+ int subStringEnd = begin + candidate.length();
+ candidate = originalPattern.substring(begin, subStringEnd);
+ //token string may be capitalized, update pattern
+ pattern = pattern.substring(0, begin) + candidate + pattern.substring(subStringEnd);
+ }
+ lastAddedToken = new Token(VALID_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+ getTokenStringLength(candidate.toLowerCase()));
+ tokens.add(lastAddedToken);
+ begin = end;
+ break;
+ //time zone
+ } else if (VALID_TIME_ZONE_TOKENS.keySet().contains(candidate)) {
+ lastAddedToken = new Token(VALID_TIME_ZONE_TOKENS.get(candidate), candidate,
+ getTokenStringLength(candidate));
+ tokens.add(lastAddedToken);
+ begin = end;
+ break;
+ }
+ }
+ }
+ return pattern;
+ }
+
+ private int getTokenStringLength(String candidate) {
+ if (SPECIAL_LENGTHS.containsKey(candidate)) {
+ return SPECIAL_LENGTHS.get(candidate);
+ }
+ return candidate.length();
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for parsing strings to datetime objects.
+ */
+ private void verifyForParse() {
+
+ // create a list of tokens' temporal fields
+ ArrayList temporalFields = new ArrayList<>();
+ ArrayList timeZoneTemporalUnits = new ArrayList<>();
+ int roundYearCount=0, yearCount=0;
+ for (Token token : tokens) {
+ if (token.temporalField != null) {
+ temporalFields.add(token.temporalField);
+ if (token.temporalField == ChronoField.YEAR) {
+ if (token.string.startsWith("r")) {
+ roundYearCount += 1;
+ } else {
+ yearCount += 1;
+ }
+ }
+ } else if (token.temporalUnit != null) {
+ timeZoneTemporalUnits.add(token.temporalUnit);
+ }
+ }
+
+ if (roundYearCount > 0 && yearCount > 0) {
+ throw new IllegalArgumentException("Invalid duplication of format element: Both year and"
+ + "round year are provided");
+ }
+ for (TemporalField tokenType : temporalFields) {
+ if (Collections.frequency(temporalFields, tokenType) > 1) {
+ throw new IllegalArgumentException(
+ "Invalid duplication of format element: multiple " + tokenType.toString()
+ + " tokens provided.");
+ }
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ !(temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM))) {
+ throw new IllegalArgumentException("Missing hour token.");
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between median indicator and hour token.");
+ }
+ if (temporalFields.contains(ChronoField.HOUR_OF_AMPM) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between hour of day and hour of am/pm token.");
+ }
+ if (temporalFields.contains(ChronoField.DAY_OF_YEAR) &&
+ (temporalFields.contains(ChronoField.DAY_OF_MONTH) ||
+ temporalFields.contains(ChronoField.MONTH_OF_YEAR))) {
+ throw new IllegalArgumentException("Day of year provided with day or month token.");
+ }
+ if (temporalFields.contains(ChronoField.SECOND_OF_DAY) &&
+ (temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM) ||
+ temporalFields.contains(ChronoField.MINUTE_OF_HOUR) ||
+ temporalFields.contains(ChronoField.SECOND_OF_MINUTE))) {
+ throw new IllegalArgumentException(
+ "Second of day token conflicts with other token(s).");
+ }
+ if (timeZoneTemporalUnits.contains(ChronoUnit.MINUTES) &&
+ !timeZoneTemporalUnits.contains(ChronoUnit.HOURS)) {
+ throw new IllegalArgumentException("Time zone minute token provided without time zone hour token.");
+ }
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for formatting datetime objects to strings.
+ */
+ private void verifyForFormat() {
+ for (Token token : tokens) {
+ if (token.type == TokenType.TIMEZONE) {
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
+ + "timestamp or date.");
+ }
+ }
+ }
+
+ @Override public String format(Timestamp ts) {
+ StringBuilder fullOutputSb = new StringBuilder();
+ String outputString = null;
+ int value;
+ LocalDateTime localDateTime =
+ LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC);
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ try {
+ value = localDateTime.get(token.temporalField);
+ outputString = formatTemporal(value, token);
+ } catch (DateTimeException e) {
+ throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from "
+ + "LocalDateTime " + localDateTime, e);
+ }
+ break;
+ case TIMEZONE: //invalid for timestamp and date
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
+ + "timestamp or date.");
+ case SEPARATOR:
+ outputString = token.string;
+ break;
+ case ISO_8601_DELIMITER:
+ outputString = token.string.toUpperCase();
+ break;
+ default:
+ //do nothing
+ }
+ fullOutputSb.append(outputString);
+ }
+ return fullOutputSb.toString();
+ }
+
+ @Override public String format(Date date) {
+ return format(Timestamp.ofEpochSecond(date.toEpochSecond()));
+ }
+
+ private String formatTemporal(int value, Token token) {
+ String output;
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ output = value == 0 ? "a" : "p";
+ output += token.string.length() == 2 ? "m" : ".m.";
+ if (token.string.startsWith("A") || token.string.startsWith("P")) {
+ output = output.toUpperCase();
+ }
+ } else {
+ // it's a numeric value
+ try {
+ output = String.valueOf(value);
+ output = padOrTruncateNumericTemporal(token, output);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Value: " + value + " couldn't be cast to string.", e);
+ }
+ }
+ return output;
+ }
+
+ /**
+ * To match token.length, pad left with zeroes or truncate.
+ */
+ private String padOrTruncateNumericTemporal(Token token, String output) {
+ if (output.length() < token.length) {
+ output = StringUtils.leftPad(output, token.length, '0'); // pad left
+ } else if (output.length() > token.length) {
+ if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ output = output.substring(0, token.length); // truncate right
+ } else {
+ output = output.substring(output.length() - token.length); // truncate left
+ }
+ }
+ if (token.temporalField == ChronoField.NANO_OF_SECOND
+ && token.string.equalsIgnoreCase("ff")) {
+ output = output.replaceAll("0*$", ""); //truncate trailing 0's
+ if (output.isEmpty()) {
+ output = "0";
+ }
+ }
+ return output;
+ }
+
+ /**
+ * Left here for timestamp with local time zone.
+ */
+ private String formatTimeZone(TimeZone timeZone, LocalDateTime localDateTime, Token token) {
+ ZoneOffset offset = timeZone.toZoneId().getRules().getOffset(localDateTime);
+ Duration seconds = Duration.of(offset.get(ChronoField.OFFSET_SECONDS), ChronoUnit.SECONDS);
+ if (token.string.equals("tzh")) {
+ long hours = seconds.toHours();
+ String s = (hours >= 0) ? "+" : "-";
+ s += (Math.abs(hours) < 10) ? "0" : "";
+ s += String.valueOf(Math.abs(hours));
+ return s;
+ } else {
+ long minutes = Math.abs(seconds.toMinutes() % MINUTES_PER_HOUR);
+ String s = String.valueOf(minutes);
+ if (s.length() == 1) {
+ s = "0" + s;
+ }
+ return s;
+ }
+ }
+
+ @Override public Timestamp parseTimestamp(String fullInput){
+ LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC);
+ String substring;
+ int index = 0;
+ int value;
+ int timeZoneSign = 0, timeZoneHours = 0, timeZoneMinutes = 0;
+
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ substring = getNextSubstring(fullInput, index, token); // e.g. yy-m -> yy
+ value = parseTemporal(substring, token); // e.g. 18->2018, July->07
+ try {
+ ldt = ldt.with(token.temporalField, value);
+ } catch (DateTimeException e){
+ throw new IllegalArgumentException(
+ "Value " + value + " not valid for token " + token.toString());
+ }
+ index += substring.length();
+ break;
+ case TIMEZONE:
+ if (token.temporalUnit == ChronoUnit.HOURS) {
+ String nextCharacter = fullInput.substring(index, index + 1);
+ timeZoneSign = "-".equals(nextCharacter) ? -1 : 1;
+ if ("-".equals(nextCharacter) || "+".equals(nextCharacter)) {
+ index++;
+ }
+ // parse next two digits
+ substring = getNextSubstring(fullInput, index, index + 2, token);
+ try {
+ timeZoneHours = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneHours < -15 || timeZoneHours > 15) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" to TZH because TZH range is -15 to +15. Pattern is " + pattern);
+ }
+ } else { // time zone minutes
+ substring = getNextSubstring(fullInput, index, token);
+ try {
+ timeZoneMinutes = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneMinutes < 0 || timeZoneMinutes > 59) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" to TZM because TZM range is 0 to 59. Pattern is " + pattern);
+ }
+ }
+ index += substring.length();
+ break;
+ case SEPARATOR:
+ index = parseSeparator(fullInput, index, token);
+ break;
+ case ISO_8601_DELIMITER:
+ index = parseIso8601Delimiter(fullInput, index, token);
+ default:
+ //do nothing
+ }
+ }
+ // time zone hours -- process here because hh/hh24 may be parsed after tzh
+ ldt = ldt.minus(timeZoneSign * timeZoneHours, ChronoUnit.HOURS);
+ // time zone minutes -- process here because sign depends on tzh sign
+ ldt = ldt.minus(
+ timeZoneSign * timeZoneMinutes, ChronoUnit.MINUTES);
+
+ // anything left unparsed at end of string? throw error
+ if (!fullInput.substring(index).isEmpty()) {
+ throw new IllegalArgumentException("Leftover input after parsing: " +
+ fullInput.substring(index) + " in string " + fullInput);
+ }
+
+ return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano());
+ }
+
+ public Date parseDate(String input){
+ return Date.ofEpochMilli(parseTimestamp(input).toEpochMilli());
+ }
+ /**
+ * Return the next substring to parse. Length is either specified or token.length, but a
+ * separator or an ISO-8601 delimiter can cut the substring short. (e.g. if the token pattern is
+ * "YYYY" we expect the next 4 characters to be 4 numbers. However, if it is "976/" then we
+ * return "976" because a separator cuts it short.)
+ */
+ private String getNextSubstring(String s, int begin, Token token) {
+ return getNextSubstring(s, begin, begin + token.length, token);
+ }
+
+ private String getNextSubstring(String s, int begin, int end, Token token) {
+ if (end > s.length()) {
+ end = s.length();
+ }
+ s = s.substring(begin, end);
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ if (s.charAt(1) == 'm' || s.charAt(1) == 'M') { // length 2
+ return s.substring(0, 2);
+ } else {
+ return s;
+ }
+ }
+ for (String sep : VALID_SEPARATORS) {
+ if (s.contains(sep)) {
+ s = s.substring(0, s.indexOf(sep));
+ }
+ }
+ // TODO this will cause problems with DAY (for example, Thursday starts with T)
+ for (String delimiter : VALID_ISO_8601_DELIMITERS) {
+ if (s.toLowerCase().contains(delimiter)) {
+ s = s.substring(0, s.toLowerCase().indexOf(delimiter));
+ }
+ }
+
+ return s;
+ }
+
+ /**
+ * Get the integer value of a temporal substring.
+ */
+ private int parseTemporal(String substring, Token token){
+ // exceptions to the rule
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ return substring.toLowerCase().startsWith("a") ? AM : PM;
+
+ } else if (token.temporalField == ChronoField.YEAR) {
+ String currentYearString = String.valueOf(LocalDateTime.now().getYear());
+ //deal with round years
+ if (token.string.startsWith("r") && substring.length() == 2) {
+ int currFirst2Digits = Integer.parseInt(currentYearString.substring(0, 2));
+ int currLast2Digits = Integer.parseInt(currentYearString.substring(2));
+ int valLast2Digits = Integer.parseInt(substring);
+ if (valLast2Digits < FIFTY && currLast2Digits >= FIFTY) {
+ currFirst2Digits += 1;
+ } else if (valLast2Digits >= FIFTY && currLast2Digits < FIFTY) {
+ currFirst2Digits -= 1;
+ }
+ substring = String.valueOf(currFirst2Digits) + substring;
+ } else { // fill in prefix digits with current date
+ substring = currentYearString.substring(0, 4 - substring.length()) + substring;
+ }
+
+ } else if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ int i = Integer.min(token.length, substring.length());
+ substring += StringUtils.repeat("0", NANOS_MAX_LENGTH - i);
+ }
+
+ // the rule
+ try {
+ return Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to integer. Pattern is " + pattern, e);
+ }
+ }
+
+ /**
+ * Parse the next separator(s). At least one separator character is expected. Separator
+ * characters are interchangeable.
+ *
+ * Caveat: If the last separator character in the separator substring is "-" and is immediately
+ * followed by a time zone hour (tzh) token, it's a negative sign and not counted as a
+ * separator, UNLESS this is the only separator character in the separator substring (in
+ * which case it is not counted as the negative sign).
+ *
+ * @throws IllegalArgumentException if separator is missing
+ */
+ private int parseSeparator(String fullInput, int index, Token token){
+ int separatorsFound = 0;
+ int begin = index;
+
+ while (index < fullInput.length() &&
+ VALID_SEPARATORS.contains(fullInput.substring(index, index + 1))) {
+ if (!isLastCharacterOfSeparator(index, fullInput) || !(nextTokenIs("tzh", token))
+ || separatorsFound == 0) {
+ separatorsFound++;
+ }
+ index++;
+ }
+
+ if (separatorsFound == 0) {
+ throw new IllegalArgumentException("Missing separator at index " + index);
+ }
+ return begin + separatorsFound;
+ }
+
+ private int parseIso8601Delimiter(String fullInput, int index, Token token) {
+ String substring;
+ substring = fullInput.substring(index, index + 1);
+ if (token.string.equalsIgnoreCase(substring)) {
+ index++;
+ } else {
+ throw new IllegalArgumentException(
+ "Missing ISO 8601 delimiter " + token.string.toUpperCase());
+ }
+ return index;
+ }
+
+ /**
+ * Is the next character something other than a separator?
+ */
+ private boolean isLastCharacterOfSeparator(int index, String string) {
+ if (index == string.length()-1) { // if we're at the end of the string, yes
+ return true;
+ }
+ return !VALID_SEPARATORS.contains(string.substring(index + 1, index + 2));
+ }
+
+ /**
+ * Does the temporalUnit/temporalField of the next token match the pattern's?
+ */
+ private boolean nextTokenIs(String pattern, Token currentToken) {
+ // make sure currentToken isn't the last one
+ if (tokens.indexOf(currentToken) == tokens.size() - 1) {
+ return false;
+ }
+ Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1);
+ pattern = pattern.toLowerCase();
+ return (VALID_TIME_ZONE_TOKENS.containsKey(pattern)
+ && VALID_TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit
+ || VALID_TEMPORAL_TOKENS.containsKey(pattern)
+ && VALID_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..1e838be886
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Deals with formatting and parsing of datetime objects.
+ */
+package org.apache.hadoop.hive.common.format.datetime;
diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java
index 6ecfcf65c9..c1eb47153e 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Date.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Date.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hive.common.type;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
@@ -72,6 +75,17 @@ public String toString() {
return localDate.format(PRINT_FORMATTER);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ try {
+ return formatter.format(this);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
public int hashCode() {
return localDate.hashCode();
}
@@ -137,6 +151,13 @@ public static Date valueOf(String s) {
return new Date(localDate);
}
+ public static Date valueOf(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return valueOf(s);
+ }
+ return formatter.parseDate(s);
+ }
+
public static Date ofEpochDay(int epochDay) {
return new Date(LocalDate.ofEpochDay(epochDay));
}
diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
index a8b7b6d186..cea1e8c2e1 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
@@ -101,6 +103,17 @@ public String toString() {
return localDateTime.format(PRINT_FORMATTER);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ try {
+ return formatter.format(this);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
public int hashCode() {
return localDateTime.hashCode();
}
@@ -166,6 +179,13 @@ public static Timestamp valueOf(String s) {
return new Timestamp(localDateTime);
}
+ public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return valueOf(s);
+ }
+ return formatter.parseTimestamp(s);
+ }
+
public static Timestamp ofEpochSecond(long epochSecond) {
return ofEpochSecond(epochSecond, 0);
}
diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
index f26f8ae01e..525c95a63d 100644
--- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
+++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import java.math.BigDecimal;
@@ -171,6 +172,18 @@ public static long millisToSeconds(long millis) {
private static final int DATE_LENGTH = "YYYY-MM-DD".length();
+ public static Timestamp stringToTimestamp(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return stringToTimestamp(s);
+ }
+
+ try {
+ return Timestamp.valueOf(s, formatter);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
public static Timestamp stringToTimestamp(String s) {
s = s.trim();
// Handle simpler cases directly avoiding exceptions
diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java
index 5db14f1906..22bcd98c1d 100644
--- common/src/java/org/apache/hive/common/util/DateParser.java
+++ common/src/java/org/apache/hive/common/util/DateParser.java
@@ -17,6 +17,7 @@
*/
package org.apache.hive.common.util;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
/**
@@ -36,9 +37,13 @@ public Date parseDate(String strValue) {
}
public boolean parseDate(String strValue, Date result) {
+ return parseDate(strValue, result, null);
+ }
+
+ public boolean parseDate(String strValue, Date result, HiveDateTimeFormatter formatter) {
Date parsedVal;
try {
- parsedVal = Date.valueOf(strValue);
+ parsedVal = Date.valueOf(strValue, formatter);
} catch (IllegalArgumentException e) {
parsedVal = null;
}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java
new file mode 100644
index 0000000000..82009f08e1
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.temporal.ChronoField;
+
+/**
+ * Test class for HiveJavaDateTimeFormatter.
+ */
+public class TestHiveJavaDateTimeFormatter {
+
+ private static final DateTimeFormatter DATE_TIME_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+ builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd();
+ DATE_TIME_FORMATTER = builder.toFormatter();
+ }
+ private HiveDateTimeFormatter formatter = new HiveJavaDateTimeFormatter(DATE_TIME_FORMATTER);
+
+ @Test
+ public void testFormat() {
+ Timestamp ts = Timestamp.valueOf("2019-01-01 00:00:00.99999");
+ Assert.assertEquals("2019-01-01 00:00:00.99999", formatter.format(ts));
+ }
+
+ @Test
+ public void testParse() {
+ String s = "2019-01-01 00:00:00.99999";
+ Assert.assertEquals(Timestamp.valueOf("2019-01-01 00:00:00.99999"),
+ formatter.parseTimestamp(s));
+ }
+
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java
new file mode 100644
index 0000000000..d189c7b042
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.text.SimpleDateFormat;
+import java.time.ZoneOffset;
+import java.util.TimeZone;
+
+/**
+ * Tests HiveSimpleDateFormatter.
+ */
+public class TestHiveSimpleDateFormatter {
+
+ private HiveDateTimeFormatter formatter =
+ new HiveSimpleDateFormatter("yyyy-MM-dd HH:mm:ss", TimeZone.getTimeZone(ZoneOffset.UTC));
+
+ @Test
+ public void testFormat() {
+ verifyFormat("2019-01-01 01:01:01");
+ verifyFormat("2019-01-01 00:00:00");
+ verifyFormat("1960-01-01 23:00:00");
+ }
+
+ private void verifyFormat(String s) {
+ Timestamp ts = Timestamp.valueOf(s);
+ Assert.assertEquals(s, formatter.format(ts));
+ }
+
+ @Test
+ public void testParse() {
+ verifyParse("2019-01-01 01:10:10");
+ verifyParse("1960-01-01 23:00:00");
+
+ }
+
+ private void verifyParse(String s) {
+ Timestamp ts = Timestamp.valueOf(s);
+ Assert.assertEquals(ts, formatter.parseTimestamp(s));
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..a0fd2f2f6a
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.sun.tools.javac.util.List;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.format.ResolverStyle;
+import java.time.format.SignStyle;
+import java.time.temporal.ChronoField;
+import java.time.temporal.TemporalField;
+import java.util.ArrayList;
+
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+
+/**
+ * Test class for HiveSqlDateTimeFormatter.
+ */
+
+public class TestHiveSqlDateTimeFormatter extends TestCase {
+
+ private HiveSqlDateTimeFormatter formatter;
+
+ public void testSetPattern() {
+ verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", new ArrayList<>(List.of(
+ null,
+ ChronoField.YEAR,
+ null,
+ ChronoField.MONTH_OF_YEAR,
+ null,
+ ChronoField.DAY_OF_MONTH,
+ null
+ )));
+
+ verifyPatternParsing("ymmdddhh24::mi:ss A.M. pm", 25, "ymmdddhh24::mi:ss A.M. pm",
+ new ArrayList<>(List.of(
+ ChronoField.YEAR,
+ ChronoField.MONTH_OF_YEAR,
+ ChronoField.DAY_OF_YEAR,
+ ChronoField.HOUR_OF_DAY,
+ null, ChronoField.MINUTE_OF_HOUR,
+ null, ChronoField.SECOND_OF_MINUTE,
+ null, ChronoField.AMPM_OF_DAY,
+ null, ChronoField.AMPM_OF_DAY
+ )));
+ }
+
+ public void testSetPatternWithBadPatterns() {
+ verifyBadPattern("e", true);
+ verifyBadPattern("yyyy-1", true);
+
+ verifyBadPattern("yyyy Y", true);
+ verifyBadPattern("yyyy R", true);
+ verifyBadPattern("yyyy-MM-DDD", true);
+ verifyBadPattern("yyyy-mm-DD DDD", true);
+ verifyBadPattern("yyyy-mm-dd HH24 HH12", true);
+ verifyBadPattern("yyyy-mm-dd HH24 AM", true);
+ verifyBadPattern("yyyy-mm-dd HH24 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd HH12 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SSSSS AM", true);
+ verifyBadPattern("yyyy-mm-dd MI SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SS SSSSS", true);
+
+ verifyBadPattern("tzm", false);
+ verifyBadPattern("tzh", false);
+ }
+
+ public void testFormatTimestamp() {
+ checkFormatTs("rr rrrr ddd", "2018-01-03 00:00:00", "18 2018 003");
+ checkFormatTs("yyyy-mm-ddtsssss.ff4z", "2018-02-03 00:00:10.777777777", "2018-02-03T00010.7777Z");
+ checkFormatTs("hh24:mi:ss.ff1", "2018-02-03 01:02:03.999999999", "01:02:03.9");
+ checkFormatTs("y yyy hh:mi:ss.ffz", "2018-02-03 01:02:03.0070070", "8 018 01:02:03.007007Z");
+ checkFormatTs("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03 01:02:03.0070070", "am a.m. am a.m. AM A.M. AM A.M.");
+ }
+
+ private void checkFormatTs(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toTimestamp(input)));
+ }
+
+ public void testFormatDate() {
+ checkFormatDate("rr rrrr ddd", "2018-01-03", "18 2018 003");
+ checkFormatDate("yyyy-mm-ddtsssss.ff4z", "2018-02-03", "2018-02-03T00000.0000Z");
+ checkFormatDate("hh24:mi:ss.ff1", "2018-02-03", "00:00:00.0");
+ checkFormatDate("y yyy T hh:mi:ss.ffz", "2018-02-03", "8 018 T 00:00:00.0Z");
+ checkFormatDate("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03", "am a.m. am a.m. AM A.M. AM A.M.");
+ checkFormatDate("DDD", "2019-12-31", "365");
+ checkFormatDate("DDD", "2020-12-31", "366");
+ }
+
+ private void checkFormatDate(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toDate(input)));
+ }
+
+ public void testParseTimestamp() {
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ff8z", "2018-02-03T04:05:06.5665Z", "2018-02-03 04:05:06.5665");
+ checkParseTimestamp("yyyy-mm-dd hh24:mi:ss.ff", "2018-02-03 04:05:06.555555555", "2018-02-03 04:05:06.555555555");
+ checkParseTimestamp("yy-mm-dd hh12:mi:ss", "99-2-03 04:05:06", "2099-02-03 04:05:06");
+ checkParseTimestamp("rr-mm-dd", "00-02-03", "2000-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "49-02-03", "2049-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "50-02-03", "1950-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "00-02-03", "2000-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "49-02-03", "2049-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "50-02-03", "1950-02-03 00:00:00");
+ checkParseTimestamp("yyy-mm-dd", "018-01-01", "2018-01-01 00:00:00");
+ checkParseTimestamp("yyyyddd", "2018284", "2018-10-11 00:00:00");
+ checkParseTimestamp("yyyyddd", "20184", "2018-01-04 00:00:00");
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ffz", "2018-02-03t04:05:06.444Z", "2018-02-03 04:05:06.444");
+ checkParseTimestamp("hh:mi:ss A.M.", "04:05:06 P.M.", "1970-01-01 16:05:06");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00--1:-30", "2019-01-01 15:30:00");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00-1:30", "2019-01-01 12:30:00");
+ checkParseTimestamp("TZM:TZH", "1 -3", "1970-01-01 03:01:00");
+ checkParseTimestamp("TZH:TZM", "-0:30", "1970-01-01 00:30:00");
+ checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02", "2333-01-01 13:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI AM", "2019-01-01 11:00 p.m.", "2019-01-01 23:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI A.M..", "2019-01-01 11:00 pm.", "2019-01-01 23:00:00");
+
+ //Test "day in year" token in a leap year scenario
+ checkParseTimestamp("YYYY DDD", "2000 60", "2000-02-29 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 61", "2000-03-01 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 366", "2000-12-31 00:00:00");
+ //Test timezone offset parsing without separators
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 02:45:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 02:45:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 13:15:00");
+ }
+
+ private void checkParseTimestamp(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ assertEquals(toTimestamp(expectedOutput), formatter.parseTimestamp(input));
+ }
+
+ public void testParseDate() {
+ checkParseDate("yyyy-mm-dd hh mi ss", "2018/01/01 2.2.2", "2018-01-01");
+ checkParseDate("rr-mm-dd", "00-02-03", "2000-02-03");
+ checkParseDate("rr-mm-dd", "49-02-03", "2049-02-03");
+ checkParseDate("rr-mm-dd", "50-02-03", "1950-02-03");
+ }
+
+ private void checkParseDate(String pattern, String input, String expectedOutput) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ assertEquals(toDate(expectedOutput), formatter.parseDate(input));
+ }
+
+ public void testParseTimestampError() {
+ verifyBadParseString("yyyy", "2019-02-03");
+ verifyBadParseString("yyyy-mm-dd ", "2019-02-03"); //separator missing
+ verifyBadParseString("yyyy-mm-dd", "2019-02-03..."); //extra separators
+ verifyBadParseString("yyyy-mm-dd hh12:mi:ss", "2019-02-03 14:00:00"); //hh12 out of range
+ verifyBadParseString("yyyy-dddsssss", "2019-912345");
+ verifyBadParseString("yyyy-mm-dd", "2019-13-23"); //mm out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range
+ verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range
+ }
+
+ private void verifyBadPattern(String string, boolean forParsing) {
+ try {
+ formatter = new HiveSqlDateTimeFormatter(string, forParsing);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+ }
+ }
+
+ /**
+ * Verify pattern is parsed correctly.
+ * Check:
+ * -token.temporalField for each token
+ * -sum of token.lengths
+ * -concatenation of token.strings
+ */
+ private void verifyPatternParsing(String pattern, ArrayList temporalFields) {
+ verifyPatternParsing(pattern, pattern.length(), pattern.toLowerCase(), temporalFields);
+ }
+
+ private void verifyPatternParsing(String pattern, int expectedPatternLength,
+ String expectedPattern, ArrayList temporalFields) {
+ formatter = new HiveSqlDateTimeFormatter(pattern, false);
+ assertEquals(temporalFields.size(), formatter.tokens.size());
+ StringBuilder sb = new StringBuilder();
+ int actualPatternLength = 0;
+ for (int i = 0; i < temporalFields.size(); i++) {
+ assertEquals("Generated list of tokens not correct", temporalFields.get(i),
+ formatter.tokens.get(i).temporalField);
+ sb.append(formatter.tokens.get(i).string);
+ actualPatternLength += formatter.tokens.get(i).length;
+ }
+ assertEquals("Token strings concatenated don't match original pattern string",
+ expectedPattern, sb.toString());
+ assertEquals(expectedPatternLength, actualPatternLength);
+ }
+
+ private void verifyBadParseString(String pattern, String string) {
+ try {
+ formatter = new HiveSqlDateTimeFormatter(pattern, true);
+ formatter.parseTimestamp(string);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+ }
+ }
+
+
+ // Methods that construct datetime objects using java.time.DateTimeFormatter.
+
+ public static Date toDate(String s) {
+ LocalDate localDate = LocalDate.parse(s, DATE_FORMATTER);
+ return Date.ofEpochDay((int) localDate.toEpochDay());
+ }
+
+ /**
+ * This is effectively the old Timestamp.valueOf method.
+ */
+ public static Timestamp toTimestamp(String s) {
+ LocalDateTime localDateTime = LocalDateTime.parse(s.trim(), TIMESTAMP_FORMATTER);
+ return Timestamp.ofEpochSecond(
+ localDateTime.toEpochSecond(ZoneOffset.UTC), localDateTime.getNano());
+ }
+
+ private static final DateTimeFormatter DATE_FORMATTER =
+ DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ private static final DateTimeFormatter TIMESTAMP_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendLiteral(" ")
+ .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd()
+ .optionalEnd();
+ TIMESTAMP_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..70ee4266f4
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests formatting and parsing of datetime objects.
+ */
+package org.apache.hadoop.hive.common.format.datetime;
diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java
index 00a7904ecf..5bf1119cef 100644
--- common/src/test/org/apache/hive/common/util/TestTimestampParser.java
+++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java
@@ -116,8 +116,7 @@ public void testPattern1() {
};
String[] invalidCases = {
- "1945-12-31-23:59:59",
- "12345",
+ "12345"
};
testValidCases(tp, validCases);
@@ -147,8 +146,7 @@ public void testMillisParser() {
};
String[] invalidCases = {
- "1945-12-31-23:59:59",
- "1420509274123-",
+ "1420509274123-"
};
testValidCases(tp, validCases);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index fa9d1e9783..0226bb9d0c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -42,8 +42,11 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToVarCharViaLongToVarChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastCharToBinary;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString;
@@ -67,10 +70,13 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64ColumnInList;
@@ -2163,9 +2169,9 @@ public VectorExpression instantiateExpression(Class> vclass, TypeInfo returnTy
DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args)
throws HiveException {
VectorExpression ve = null;
- Constructor> ctor = getConstructor(vclass);
- int numParams = ctor.getParameterTypes().length;
int argsLength = (args == null) ? 0 : args.length;
+ Constructor> ctor = getConstructor(vclass, argsLength);
+ int numParams = ctor.getParameterTypes().length;
if (numParams == 0) {
try {
ve = (VectorExpression) ctor.newInstance();
@@ -2173,7 +2179,7 @@ public VectorExpression instantiateExpression(Class> vclass, TypeInfo returnTy
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " +
getStackTraceAsSingleLine(ex));
}
- } else if (numParams == argsLength) {
+ } else if (numParams == argsLength) { // frogmethod this causes problems
try {
ve = (VectorExpression) ctor.newInstance(args);
} catch (Exception ex) {
@@ -3139,9 +3145,17 @@ private VectorExpression getCastToString(List childExpr, TypeInfo
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else { //second argument will be format string
+ return createVectorExpression(CastDateToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else { //second argument will be format string
+ return createVectorExpression(CastTimestampToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
// STRING and VARCHAR types require no conversion, so use a no-op.
@@ -3173,9 +3187,17 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastDateToCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastTimestampToCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
@@ -3203,9 +3225,17 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastDateToVarCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastTimestampToVarCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
@@ -4089,7 +4119,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio
return ((org.apache.hadoop.hive.common.type.Timestamp) java).toSqlTimestamp();
}
- private Constructor> getConstructor(Class> cl) throws HiveException {
+ private Constructor> getConstructor(Class> cl, int argsCount) throws HiveException {
try {
Constructor> [] ctors = cl.getDeclaredConstructors();
if (ctors.length == 1) {
@@ -4097,11 +4127,12 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio
}
Constructor> defaultCtor = cl.getConstructor();
for (Constructor> ctor : ctors) {
- if (!ctor.equals(defaultCtor)) {
+ if (!ctor.equals(defaultCtor) && ctor.getParameterCount() - 1 == argsCount) {
return ctor;
}
}
- throw new HiveException("Only default constructor found");
+ throw new HiveException("Only default constructor found, or no constructor found with " +
+ argsCount + "arguments");
} catch (Exception ex) {
throw new HiveException(ex);
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java
new file mode 100644
index 0000000000..e2cdc917a0
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO CHAR() WITH FORMAT ).
+ */
+public class CastDateToCharWithFormat extends CastDateToChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToCharWithFormat() {
+ super();
+ }
+
+ public CastDateToCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to char with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
index dfa9f8a00d..978875e312 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
@@ -18,28 +18,31 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import java.sql.Date;
-import java.text.SimpleDateFormat;
import java.util.TimeZone;
public class CastDateToString extends LongToStringUnaryUDF {
private static final long serialVersionUID = 1L;
protected transient Date dt = new Date(0);
- private transient SimpleDateFormat formatter;
+ private transient HiveDateTimeFormatter formatter;
public CastDateToString() {
super();
- formatter = new SimpleDateFormat("yyyy-MM-dd");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ initFormatter();
}
public CastDateToString(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
- formatter = new SimpleDateFormat("yyyy-MM-dd");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ initFormatter();
+ }
+
+ public void initFormatter() {
+ formatter = new HiveSimpleDateFormatter("yyyy-MM-dd", TimeZone.getTimeZone("UTC"));
}
// The assign method will be overridden for CHAR and VARCHAR.
@@ -47,10 +50,23 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
outV.setVal(i, bytes, 0, length);
}
+ private void assignNull(BytesColumnVector outV, int i) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+
@Override
protected void func(BytesColumnVector outV, long[] vector, int i) {
- dt.setTime(DateWritableV2.daysToMillis((int) vector[i]));
- byte[] temp = formatter.format(dt).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, vector, i, formatter);
+ }
+
+ protected void func(BytesColumnVector outV, long[] vector, int i, HiveDateTimeFormatter formatter) {
+ try {
+ byte[] temp = formatter.format(Timestamp.ofEpochMilli(
+ org.apache.hadoop.hive.common.type.Date.ofEpochDay((int) vector[i]).toEpochMilli())).getBytes();
+ assign(outV, i, temp, temp.length);
+ } catch (Exception e) {
+ assignNull(outV, i);
+ }
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
new file mode 100644
index 0000000000..a8c3437553
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastDateToStringWithFormat extends CastDateToString {
+ private static final long serialVersionUID = 1L;
+ protected transient Date dt;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToStringWithFormat() {
+ super();
+ }
+
+ public CastDateToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to string with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ // The assign method will be overridden for CHAR and VARCHAR.
+ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
+ outV.setVal(i, bytes, 0, length);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.INT_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java
new file mode 100644
index 0000000000..11ed8e2039
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO VARCHAR() WITH FORMAT ).
+ */
+public class CastDateToVarCharWithFormat extends CastDateToVarChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToVarCharWithFormat() {
+ super();
+ }
+
+ public CastDateToVarCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to varchar with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index a6dff12e1a..44a451b3bc 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -151,10 +152,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ /**
+ * Used by CastStringToDate.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ evaluate(outputColVector, inV, i, null);
+ }
+
+ /**
+ * Used by CastStringToDateWithFormat.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i,
+ HiveDateTimeFormatter formatter) {
String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
Date hDate = new Date();
- if (dateParser.parseDate(dateString, hDate)) {
+ if (dateParser.parseDate(dateString, hDate, formatter)) {
outputColVector.vector[i] = DateWritableV2.dateToDays(hDate);
return;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
new file mode 100644
index 0000000000..e242ad5ea2
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO DATE WITH FORMAT ).
+ */
+public class CastStringToDateWithFormat extends CastStringToDate {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToDateWithFormat() {
+ super();
+ }
+
+ public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to date with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), true);
+ }
+
+ @Override
+ protected void evaluate(LongColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
index b48b0136eb..f8d81cdb13 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
@@ -19,8 +19,9 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.util.Arrays;
-import java.sql.Timestamp;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -143,21 +144,40 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ /**
+ * This is used by CastStringToTimestamp.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ evaluate(outputColVector, inputColVector, i, null);
+ }
+
+ /**
+ * This is used by CastStringToTimestampWithFormat.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector,
+ int i, HiveDateTimeFormatter formatter) {
try {
- org.apache.hadoop.hive.common.type.Timestamp timestamp =
- PrimitiveObjectInspectorUtils.getTimestampFromString(
+ Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestampFromString(
new String(
inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i],
- "UTF-8"));
- outputColVector.set(i, timestamp.toSqlTimestamp());
+ "UTF-8"),
+ formatter);
+ if (timestamp != null) {
+ outputColVector.set(i, timestamp.toSqlTimestamp());
+ } else {
+ setNullValue(outputColVector, i);
+ }
} catch (Exception e) {
- outputColVector.setNullValue(i);
- outputColVector.isNull[i] = true;
- outputColVector.noNulls = false;
+ setNullValue(outputColVector, i);
}
}
+ private void setNullValue(TimestampColumnVector outputColVector, int i) {
+ outputColVector.setNullValue(i);
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+
@Override
public String vectorExpressionParameters() {
return getColumnParamString(0, inputColumn);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
new file mode 100644
index 0000000000..eeeaa5d935
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO TIMESTAMP WITH FORMAT ).
+ */
+public class CastStringToTimestampWithFormat extends CastStringToTimestamp {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToTimestampWithFormat() {
+ super();
+ }
+
+ public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to timestamp with format"
+ + "), but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), true);
+ }
+
+ @Override
+ protected void evaluate(TimestampColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java
new file mode 100644
index 0000000000..b9140c0f34
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO CHAR( WITH FORMAT ).
+ */
+public class CastTimestampToCharWithFormat extends CastTimestampToChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToCharWithFormat() {
+ super();
+ }
+
+ public CastTimestampToCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to char with format ),"
+ + " but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ public CastTimestampToCharWithFormat(
+ int inputColumn, byte[] patternBytes, int maxLength, int outputColumnNum) {
+ this(inputColumn, patternBytes, outputColumnNum);
+ setMaxLength(maxLength);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
index adc3a9d7b9..61da01fda0 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -42,12 +44,20 @@
PRINT_FORMATTER = builder.toFormatter();
}
+ private transient HiveDateTimeFormatter format;
+
public CastTimestampToString() {
super();
+ initFormatter();
}
public CastTimestampToString(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
+ initFormatter();
+ }
+
+ private void initFormatter() {
+ format = new HiveJavaDateTimeFormatter(PRINT_FORMATTER);
}
// The assign method will be overridden for CHAR and VARCHAR.
@@ -55,14 +65,27 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
outV.setVal(i, bytes, 0, length);
}
+ private void assignNull(BytesColumnVector outV, int i) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+
@Override
protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
- byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC)
- .withNano(inV.nanos[i])
- .format(PRINT_FORMATTER).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, inV, i, format);
}
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i, HiveDateTimeFormatter formatter) {
+ try {
+ String formattedLocalDateTime = formatter.format(
+ org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i]));
+
+ byte[] temp = formattedLocalDateTime.getBytes();
+ assign(outV, i, temp, temp.length);
+ } catch (Exception e) {
+ assignNull(outV, i);
+ }
+ }
public static String getTimestampString(Timestamp ts) {
return
LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC)
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
new file mode 100644
index 0000000000..a8c7906393
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastTimestampToStringWithFormat extends CastTimestampToString {
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToStringWithFormat() {
+ super();
+ }
+
+ public CastTimestampToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to string with format"
+ + " ), but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.TIMESTAMP,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java
new file mode 100644
index 0000000000..37b2010638
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO VARCHAR( WITH FORMAT ).
+ */
+public class CastTimestampToVarCharWithFormat extends CastTimestampToVarChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToVarCharWithFormat() {
+ super();
+ }
+
+ public CastTimestampToVarCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to varchar with format"
+ + "), but not found");
+ }
+ formatter =
+ new HiveSqlDateTimeFormatter(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ public CastTimestampToVarCharWithFormat(
+ int inputColumn, byte[] patternBytes, int maxLength, int outputColumnNum) {
+ this(inputColumn, patternBytes, outputColumnNum);
+ setMaxLength(maxLength);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 89fad04d26..1c6eb43320 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -104,9 +104,12 @@
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import java.math.BigDecimal;
import java.math.BigInteger;
@@ -396,13 +399,13 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
childRexNodeLst.add(tmpRN);
}
- // See if this is an explicit cast.
+ // See if this is an explicit cast. Cast with format handled below.
RexNode expr = null;
RelDataType retType = null;
expr = handleExplicitCast(func, childRexNodeLst);
if (expr == null) {
- // This is not a cast; process the function.
+ // This is not an explicit cast; process the function.
retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(),
func.getGenericUDF(), argTypeBldr.build(), retType);
@@ -457,6 +460,29 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
childRexNodeLst.clear();
childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, rangeL, op));
childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, op, rangeH));
+
+ // Handle cast with format (TODO GenericUDFToTimestampLocalTZ will also need this treatment)
+ // by adding extra typeInfo parameters (e.g. length) as third argument to UDF,
+ // Otherwise an optimized TOK_FUNCTION subtree in the AST will look like:
+ // (tok_function char (. (tok_table_or_col ) ) '')
+ // which is missing char length info and will throw a NPE.
+ // Resulting TOK_FUNCTION subtree in the AST will look like:
+ // (tok_function char (. (tok_table_or_col ) ) '' )
+ // and the 3rd argument will be handled in GenericUDFToChar and GenericUDFToVarchar.
+ } else if (childRexNodeLst.size() == 2) {
+ GenericUDF udf = func.getGenericUDF();
+ if (udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToChar) {
+ ExprNodeConstantDesc exprNodeDesc = new ExprNodeConstantDesc();
+ if (udf instanceof GenericUDFToChar) {
+ exprNodeDesc.setValue(
+ ((CharTypeInfo) ((GenericUDFToChar) udf).getTypeInfo()).getLength());
+ } else { //GenericUDFToVarchar
+ exprNodeDesc.setValue(
+ ((VarcharTypeInfo) ((GenericUDFToVarchar) udf).getTypeInfo()).getLength());
+ }
+ exprNodeDesc.setTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("int"));
+ childRexNodeLst.add(2, convert(exprNodeDesc));
+ }
}
expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst);
} else {
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 58fe0cd32e..b71ec5b4eb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -248,7 +248,8 @@ castExpression
expression
KW_AS
primitiveType
- RPAREN -> ^(TOK_FUNCTION primitiveType expression)
+ (KW_FORMAT expression)?
+ RPAREN -> ^(TOK_FUNCTION primitiveType expression*)
;
caseExpression
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
index bcc4114099..6c3c3349bb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import java.text.SimpleDateFormat;
-
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol;
@@ -46,7 +44,6 @@
+ " '2009-07-29'")
@VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class})
public class GenericUDFDateSub extends GenericUDFDateAdd {
- private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
public GenericUDFDateSub() {
this.signModifier = -1;
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
index 70f57b7727..1dc67b8bc6 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
@@ -17,8 +17,7 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -29,9 +28,9 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestamp;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestampWithFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter;
@@ -48,9 +47,12 @@
*
*/
@Description(name = "timestamp",
-value = "cast(date as timestamp) - Returns timestamp")
+ value = "cast( as timestamp [format ]) - Returns timestamp",
+ extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used.")
@VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class,
- CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class})
+ CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class,
+ CastStringToTimestampWithFormat.class})
public class GenericUDFTimestamp extends GenericUDF {
private transient PrimitiveObjectInspector argumentOI;
@@ -88,6 +90,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
tc.setIntToTimestampInSeconds(intToTimestampInSeconds);
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ tc.setDateTimeFormatter(
+ new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 1), true));
+ }
+
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
}
@@ -97,17 +105,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (o0 == null) {
return null;
}
-
return tc.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (1 <= children.length && children.length <= 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS TIMESTAMP)");
+ sb.append(" AS TIMESTAMP");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
index 899abf76b8..d072cc6f0c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
@@ -19,6 +19,7 @@
import java.io.Serializable;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -34,12 +35,14 @@
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
@Description(name = "char",
-value = "CAST( as CHAR(length)) - Converts the argument to a char value.",
+value = "CAST( as CHAR(length) [FORMAT ]) - Converts the argument to a char"
+ + "value.",
extended = "Values will be truncated if the input value is too long to fit"
-+ " within the char length."
-+ "Example:\n "
-+ " > SELECT CAST(1234 AS char(10)) FROM src LIMIT 1;\n"
-+ " '1234'")
+ + " within the char length. If format is specified with FORMAT argument then SQL:2016 datetime"
+ + " formats will be used.\n"
+ + "Example:\n "
+ + " > SELECT CAST(1234 AS char(10)) FROM src LIMIT 1;\n"
+ + " '1234'")
public class GenericUDFToChar extends GenericUDF implements SettableUDF, Serializable {
private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToChar.class.getName());
@@ -55,7 +58,7 @@ public GenericUDFToChar() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("CHAR cast requires a value argument");
}
try {
@@ -65,12 +68,24 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
"The function CHAR takes only primitive types");
}
+ // Third argument could be char length.
+ if (typeInfo == null && arguments.length > 2 && arguments[2] != null) {
+ typeInfo = new CharTypeInfo(getConstantIntValue(arguments, 2));
+ }
+
// Check if this UDF has been provided with type params for the output char type
SettableHiveCharObjectInspector outputOI;
outputOI = (SettableHiveCharObjectInspector)
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new HiveCharConverter(argumentOI, outputOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ converter.setDateTimeFormatter(
+ new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 1), true));
+ }
+
return outputOI;
}
@@ -86,13 +101,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length >= 1 && children.length <= 3);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
sb.append(" AS CHAR(");
- sb.append("" + typeInfo.getLength());
+ if (typeInfo != null) {
+ sb.append(typeInfo.getLength());
+ } else if (children.length > 2) {
+ sb.append(children[2]);
+ }
sb.append(")");
+ if (children.length > 1) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
sb.append(")");
return sb.toString();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
index c309ffa5e3..251a67dc92 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
@@ -17,11 +17,13 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDateWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -36,12 +38,15 @@
* GenericUDFToDate
*/
@Description(name = "date",
- value = "CAST( as DATE) - Returns the date represented by the date string.",
- extended = "date_string is a string in the format 'yyyy-MM-dd.'"
+ value = "CAST( as DATE [FORMAT ]) - Returns the date represented by the date string.",
+ extended = "date_string is a string in the format 'yyyy-MM-dd.' "
+ + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used for parsing."
+ "Example:\n "
+ " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n"
+ " '2009-01-01'")
-@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class})
+@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class,
+ CastStringToDateWithFormat.class})
public class GenericUDFToDate extends GenericUDF {
private transient PrimitiveObjectInspector argumentOI;
@@ -75,6 +80,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
dc = new DateConverter(argumentOI,
PrimitiveObjectInspectorFactory.writableDateObjectInspector);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ dc.setDateTimeFormatter(
+ new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 1), true));
+ }
+
return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
}
@@ -90,11 +102,16 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS DATE)");
+ sb.append(" AS DATE");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
index d5764419d6..a438392206 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -28,10 +29,12 @@
import org.slf4j.LoggerFactory;
@Description(name = "string",
-value = "CAST( as STRING) - Converts the argument to a string value.",
-extended = "Example:\n "
-+ " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n"
-+ " '1234'")
+ value = "CAST( as STRING [FORMAT ]) - Converts the argument to a string value.",
+ extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will "
+ + "be used.\n"
+ + "Example:\n "
+ + " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n"
+ + " '1234'")
public class GenericUDFToString extends GenericUDF {
private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToString.class.getName());
@@ -43,7 +46,7 @@ public GenericUDFToString() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("STRING cast requires a value argument");
}
try {
@@ -54,26 +57,38 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
}
converter = new TextConverter(argumentOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ converter.setDateTimeFormatter(
+ new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 1), true));
+ }
+
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
- Object o0 = arguments[0].get();
- if (o0 == null) {
- return null;
- }
+ Object o0 = arguments[0].get();
+ if (o0 == null) {
+ return null;
+ }
- return converter.convert(o0);
+ return converter.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS STRING)");
+ sb.append(" AS STRING");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
index b9a2bc2b9f..c2b414ac85 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
@@ -19,6 +19,7 @@
import java.io.Serializable;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -34,12 +35,14 @@
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
@Description(name = "varchar",
-value = "CAST( as VARCHAR(length)) - Converts the argument to a varchar value.",
+value = "CAST( as VARCHAR(length) [FORMAT ]) - Converts the argument to a "
+ + "varchar value.",
extended = "Values will be truncated if the input value is too long to fit"
-+ " within the varchar length."
-+ "Example:\n "
-+ " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n"
-+ " '1234'")
+ + " within the varchar length. If format is specified with FORMAT argument then SQL:2016"
+ + "datetime formats will be used.\n"
+ + "Example:\n "
+ + " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n"
+ + " '1234'")
public class GenericUDFToVarchar extends GenericUDF implements SettableUDF, Serializable {
private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToVarchar.class.getName());
@@ -55,7 +58,7 @@ public GenericUDFToVarchar() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("VARCHAR cast requires a value argument");
}
try {
@@ -65,12 +68,24 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
"The function VARCHAR takes only primitive types");
}
+ // Third argument could be char length.
+ if (typeInfo == null && arguments.length > 2 && arguments[2] != null) {
+ typeInfo = new VarcharTypeInfo(getConstantIntValue(arguments, 2));
+ }
+
// Check if this UDF has been provided with type params for the output varchar type
SettableHiveVarcharObjectInspector outputOI;
outputOI = (SettableHiveVarcharObjectInspector)
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new HiveVarcharConverter(argumentOI, outputOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ converter.setDateTimeFormatter(
+ new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 1), false));
+ }
+
return outputOI;
}
@@ -86,12 +101,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length >= 1 && children.length <= 3);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS ");
- sb.append(typeInfo.getQualifiedName());
+ sb.append(" AS varchar(");
+ if (typeInfo != null) {
+ sb.append(typeInfo.getLength());
+ } else if (children.length > 2) {
+ sb.append(children[2]);
+ }
+ sb.append(")");
+ if (children.length > 1) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
sb.append(")");
return sb.toString();
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
index 663237739e..314e394d67 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.Random;
+import org.apache.hadoop.hive.common.type.Date;
import org.junit.Assert;
import org.apache.hadoop.hive.serde2.RandomTypeUtil;
@@ -115,18 +116,20 @@ public void testRoundToDecimalPlaces() throws HiveException {
Assert.assertEquals(1.2346d, resultV.vector[7], Double.MIN_VALUE);
}
- static int DAYS_LIMIT = 365 * 9999;
+ private static final int DAYS_LIMIT = 365 * 9999;
+ //approximate, so we get some negative values:
+ private static final int SMALLEST_EPOCH_DAY = -365 * 1969;
public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) {
Random r = new Random(12099);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector inV;
TimestampColumnVector outV;
- inV = new LongColumnVector();
- outV = new TimestampColumnVector();
+ inV = new LongColumnVector(intValues.length);
+ outV = new TimestampColumnVector(intValues.length);
for (int i = 0; i < intValues.length; i++) {
- intValues[i] = r.nextInt() % DAYS_LIMIT;
+ intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT;
inV.vector[i] = intValues[i];
}
@@ -137,6 +140,36 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) {
+ // get date in timestamp out, and change timestamp out to string out
+ VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues);
+ BytesColumnVector outV = new BytesColumnVector(intValues.length);
+ batch.cols[1] = outV;
+ return batch;
+ }
+
+ // For testing CastDateToStringWithFormat with
+ // TestVectorTypeCastsWithFormat#testCastDateToStringWithFormat
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ LongColumnVector dateColumnV;
+ BytesColumnVector stringColumnV;
+ dateColumnV = new LongColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ dateColumnV.vector[0] = Date.valueOf("2019-12-31").toEpochDay();
+ dateColumnV.vector[1] = Date.valueOf("1776-07-04").toEpochDay();
+ dateColumnV.vector[2] = Date.valueOf("2012-02-29").toEpochDay();
+ dateColumnV.vector[3] = Date.valueOf("1580-08-08").toEpochDay();
+ dateColumnV.vector[4] = Date.valueOf("0005-01-01").toEpochDay();
+ dateColumnV.vector[5] = Date.valueOf("9999-12-31").toEpochDay();
+
+ batch.cols[0] = dateColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() {
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector lcv;
@@ -277,6 +310,42 @@ public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() {
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchStringInTimestampOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("2019-12-31 00:00:00.999999999").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776-07-04 17:07:06.177617761").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012-02-29 23:59:59.999999999").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580-08-08 00:00:00.0").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005-01-01 00:00:00.0").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999-12-31 23:59:59.999999999").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchStringInDateOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("19/12/31").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776--07--04").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012/02/29").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580/08/08").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005/01/01").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999/12/31").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) {
Random r = new Random(345);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
@@ -297,6 +366,58 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[]
return batch;
}
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut(
+ long[] epochSecondValues, int[] nanoValues) {
+ Random r = new Random(345);
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ batch.size = epochSecondValues.length;
+
+ TimestampColumnVector inV;
+ BytesColumnVector outV;
+ inV = new TimestampColumnVector(batch.size);
+ outV = new BytesColumnVector(batch.size);
+
+ for (int i = 0; i < batch.size; i++) {
+ Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+ epochSecondValues[i] = randTimestamp.toEpochSecond();
+ nanoValues[i] = randTimestamp.getNanos();
+ inV.set(i, randTimestamp.toSqlTimestamp());
+ }
+
+ batch.cols[0] = inV;
+ batch.cols[1] = outV;
+
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ TimestampColumnVector timestampColumnV;
+ BytesColumnVector stringColumnV;
+ timestampColumnV = new TimestampColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ timestampColumnV.set(0, getSqlTimestamp("2019-12-31 19:20:21.999999999"));
+ timestampColumnV.set(1, getSqlTimestamp("1776-07-04 17:07:06.177617761"));
+ timestampColumnV.set(2, getSqlTimestamp("2012-02-29 23:59:59.999999999"));
+ timestampColumnV.set(3, getSqlTimestamp("1580-08-08 00:00:00"));
+ timestampColumnV.set(4, getSqlTimestamp("0005-01-01 00:00:00"));
+ timestampColumnV.set(5, getSqlTimestamp("9999-12-31 23:59:59.999999999"));
+
+ batch.cols[0] = timestampColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
+ private static java.sql.Timestamp getSqlTimestamp(String s) {
+ java.sql.Timestamp ts = java.sql.Timestamp.valueOf(s);
+ // subtract 8 hours because sql timestamps are assumed to be given in US/Pacific time
+ ts.setHours(ts.getHours() - 8);
+ return ts;
+ }
+
static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L;
public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
index 58fd7b030e..a449ea143d 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
@@ -23,7 +23,9 @@
import static org.junit.Assert.assertTrue;
import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
+import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@@ -72,6 +74,30 @@ public void testVectorCastDoubleToLong() throws HiveException {
Assert.assertEquals(1, resultV.vector[6]);
}
+ // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final
+ // value because VM in test time zone is PST.
+ private static final long TIME_DIFFERENCE = 28800000L;
+ @Test
+ public void testCastDateToString() throws HiveException {
+ int[] intValues = new int[100];
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDateToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < intValues.length; i++) {
+ expected =
+ new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+
+ Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result);
+ }
+ }
+
@Test
public void testCastDateToTimestamp() throws HiveException {
int[] intValues = new int[500];
@@ -192,6 +218,31 @@ public void testCastTimestampToDouble() throws HiveException {
}
}
+ @Test
+ public void testCastTimestampToString() throws HiveException {
+ int numberToTest = 100;
+ long[] epochSecondValues = new long[numberToTest];
+ int[] nanoValues = new int[numberToTest];
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastTimestampToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < numberToTest; i++) {
+ expected = org.apache.hadoop.hive.common.type.Timestamp
+ .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+ Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] +
+ " nanoseconds: " + nanoValues[i],
+ expected, result);
+ }
+ }
+
public byte[] toBytes(String s) {
byte[] b = null;
try {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
new file mode 100644
index 0000000000..6aa2843bbe
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+/**
+ * Tests vectorized type cast udfs CastDateToStringWithFormat, CastTimestampToStringWithFormat,
+ * CastStringToDateWithFormat, CastStringToTimestampWithFormat.
+ */
+public class TestVectorTypeCastsWithFormat {
+
+ @Test
+ public void testCastDateToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastDateToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+ verifyString(0, "2019", resultV);
+ verifyString(1, "1776", resultV);
+ verifyString(2, "2012", resultV);
+ verifyString(3, "1580", resultV);
+ verifyString(4, "0005", resultV);
+ verifyString(5, "9999", resultV);
+
+ expr = new CastDateToStringWithFormat(0, "MM".getBytes(), 1);
+ resultV = new BytesColumnVector();
+ b.cols[1] = resultV;
+ expr.evaluate(b);
+ verifyString(0, "12", resultV);
+ verifyString(1, "07", resultV);
+ verifyString(2, "02", resultV);
+ verifyString(3, "08", resultV);
+ verifyString(4, "01", resultV);
+ verifyString(5, "12", resultV);
+ }
+
+ @Test
+ public void testCastTimestampToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastTimestampToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("2019", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("1776", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("2012", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("1580", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("0004", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("9999", getStringFromBytesColumnVector(resultV, 5));
+
+ resultV = new BytesColumnVector();
+ b.cols[1] = resultV;
+ expr = new CastTimestampToStringWithFormat(0, "HH24".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("19", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("17", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 5));
+ }
+
+ @Test
+ public void testCastStringToTimestampWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInTimestampOutFormatted();
+ TimestampColumnVector resultV;
+ resultV = new TimestampColumnVector();
+ b.cols[1] = resultV;
+ VectorExpression expr = new CastStringToTimestampWithFormat(0, "yyyy.mm.dd HH24.mi.ss.ff".getBytes(), 1);
+ expr.evaluate(b);
+
+ verifyTimestamp("2019-12-31 00:00:00.999999999", resultV, 0);
+ verifyTimestamp("1776-07-04 17:07:06.177617761", resultV, 1);
+ verifyTimestamp("2012-02-29 23:59:59.999999999", resultV, 2);
+ verifyTimestamp("1580-08-08 00:00:00", resultV, 3);
+ verifyTimestamp("0005-01-01 00:00:00", resultV, 4);
+ verifyTimestamp("9999-12-31 23:59:59.999999999", resultV, 5);
+ }
+
+ private void verifyTimestamp(String tsString, TimestampColumnVector resultV, int index) {
+ Assert.assertEquals(Timestamp.valueOf(tsString).toEpochMilli(), resultV.time[index]);
+ Assert.assertEquals(Timestamp.valueOf(tsString).getNanos(), resultV.nanos[index]);
+ }
+
+ @Test
+ public void testCastStringToDateWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInDateOutFormatted();
+ LongColumnVector resultV;
+ resultV = new LongColumnVector();
+ b.cols[1] = resultV;
+ VectorExpression expr = new CastStringToDateWithFormat(0, "yyyy.mm.dd".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals(Date.valueOf("2019-12-31").toEpochDay(), resultV.vector[0]);
+ Assert.assertEquals(Date.valueOf("1776-07-04").toEpochDay(), resultV.vector[1]);
+ Assert.assertEquals(Date.valueOf("2012-02-29").toEpochDay(), resultV.vector[2]);
+ Assert.assertEquals(Date.valueOf("1580-08-08").toEpochDay(), resultV.vector[3]);
+ Assert.assertEquals(Date.valueOf("0005-01-01").toEpochDay(), resultV.vector[4]);
+ Assert.assertEquals(Date.valueOf("9999-12-31").toEpochDay(), resultV.vector[5]);
+ }
+
+ private void verifyString(int resultIndex, String expected, BytesColumnVector resultV) {
+ String result = getStringFromBytesColumnVector(resultV, resultIndex);
+ Assert.assertEquals(expected, result);
+ }
+
+ private String getStringFromBytesColumnVector(BytesColumnVector resultV, int i) {
+ String result;
+ byte[] resultBytes = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(resultBytes, StandardCharsets.UTF_8);
+ return result;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java
new file mode 100644
index 0000000000..97c7650f31
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests UDFFromUnixTime.
+ */
+public class TestUDFFromUnixTime {
+
+ @Test
+ public void testFromUnixTime() {
+ UDFFromUnixTime udf = new UDFFromUnixTime();
+
+ //int, no format
+ verifyInt(0, "1970-01-01 00:00:00", null, udf);
+ verifyInt(1296705906, "2011-02-03 04:05:06", null, udf);
+ verifyInt(1514818800, "2018-01-01 15:00:00", null, udf);
+
+ //long, no format
+ verifyLong(0L, "1970-01-01 00:00:00", null, udf);
+ verifyLong(1296705906L, "2011-02-03 04:05:06", null, udf);
+ verifyLong(1514818800L, "2018-01-01 15:00:00", null, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "0995-01-05 15:00:00", null, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "0005-01-01 00:00:00", null, udf);
+ verifyLong(253402300799L, "9999-12-31 23:59:59", null, udf);
+
+ //int with format
+ String format = "HH:mm:ss";
+ verifyInt(0, "00:00:00", format, udf);
+ verifyInt(1296705906, "04:05:06", format, udf);
+ verifyInt(1514818800, "15:00:00", format, udf);
+
+ //long with format
+ verifyLong(0L, "00:00:00", format, udf);
+ verifyLong(1296705906L, "04:05:06", format, udf);
+ verifyLong(1514818800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "00:00:00", format, udf);
+ verifyLong(253402300799L, "23:59:59", format, udf);
+
+ }
+
+ private void verifyInt(int value, String expected, String format, UDFFromUnixTime udf) {
+ IntWritable input = new IntWritable(value);
+ Text res;
+ if (format == null) {
+ res = udf.evaluate(input);
+ } else {
+ res = udf.evaluate(input, new Text(format));
+ }
+ Assert.assertEquals(expected, res.toString());
+ }
+
+ private void verifyLong(long value, String expected, String format, UDFFromUnixTime udf) {
+ LongWritable input = new LongWritable(value);
+ Text res;
+ if (format == null) {
+ res = udf.evaluate(input);
+ } else {
+ res = udf.evaluate(input, new Text(format));
+ }
+ Assert.assertEquals(expected, res.toString());
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
index 7c2ee15646..e9c188b883 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
@@ -35,9 +35,9 @@
public class TestGenericUDFAddMonths extends TestCase {
- private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss");
- private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS");
- private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd");
+ private final Text fmtTextWithTime = new Text("yyyy-MM-dd HH:mm:ss");
+ private final Text fmtTextWithTimeAndms = new Text("yyyy-MM-dd HH:mm:ss.SSS");
+ private final Text fmtTextWithoutTime = new Text("yyyy-MM-dd");
private final Text fmtTextInvalid = new Text("YYYY-abcdz");
public void testAddMonthsInt() throws HiveException {
@@ -215,7 +215,6 @@ public void testAddMonthsLong() throws HiveException {
}
-
private void runAndVerify(String str, int months, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(new Text(str));
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
new file mode 100644
index 0000000000..e6d531f2b5
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertNull;
+
+/**
+ * Tests cast udfs GenericUDFToString, GenericUDFToDate, GenericUDFTimestamp with second format
+ * argument. E.g. CAST ( AS STRING WITH FORMAT )
+ */
+public class TestGenericUDFCastWithFormat {
+
+ @Test
+ public void testDateToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ testCast(udf, inputOI, date("2009-07-30"),"yyyy-MM-dd", "2009-07-30");
+ testCast(udf, inputOI, date("2009-07-30"), "yyyy", "2009");
+ testCast(udf, inputOI, date("1969-07-30"), "dd", "30");
+ }
+
+ @Test
+ public void testStringToDateWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToDate();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30", "yyyy-MM-dd", "2009-07-30");
+ testCast(udf, inputOI, "2009", "yyyy", "2009-01-01");
+ testCast(udf, inputOI, "30", "dd", "1970-01-30");
+ }
+
+ @Test
+ public void testStringToTimestampWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFTimestamp();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30 01:02:03", "yyyy-MM-dd HH24:mi:ss", "2009-07-30 01:02:03");
+ testCast(udf, inputOI, "2009", "yyyy", "2009-01-01 00:00:00");
+ testCast(udf, inputOI, "07/30/2009 11:0200", "MM/dd/yyyy hh24:miss", "2009-07-30 11:02:00");
+ testCast(udf, inputOI, "69.07.30.", "yy.MM.dd.", "1969-07-30 00:00:00");
+ }
+
+ @Test
+ public void testTimestampToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ testCast(udf, inputOI, timestamp("2009-07-30 00:00:08"),
+ "yyyy-MM-dd HH24:mi:ss", "2009-07-30 00:00:08");
+ testCast(udf, inputOI, timestamp("2009-07-30 11:02:00"),
+ "MM/dd/yyyy hh24miss", "07/30/2009 110200");
+ testCast(udf, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07");
+ testCast(udf, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69");
+ }
+
+ private TimestampWritableV2 timestamp(String s) {
+ return new TimestampWritableV2(Timestamp.valueOf(s));
+ }
+
+ private DateWritableV2 date(String s) {
+ return new DateWritableV2(Date.valueOf(s));
+ }
+
+ private void testCast(
+ GenericUDF udf, ObjectInspector inputOI, Object input, String format, String output)
+ throws HiveException {
+
+ ConstantObjectInspector formatOI =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format));
+ ObjectInspector[] arguments = {inputOI, formatOI};
+ udf.initialize(arguments);
+
+ GenericUDF.DeferredObject valueObj = new GenericUDF.DeferredJavaObject(input);
+ GenericUDF.DeferredObject formatObj = new GenericUDF.DeferredJavaObject(new Text(format));
+ GenericUDF.DeferredObject[] args = {valueObj, formatObj};
+
+ assertEquals("cast " + inputOI.getTypeName() + " to " + udf.getFuncName() + " failed ",
+ output, udf.evaluate(args).toString());
+
+ // Try with null args
+ GenericUDF.DeferredObject[] nullArgs = {new GenericUDF.DeferredJavaObject(null)};
+ assertNull(udf.getFuncName() + " with NULL arguments failed", udf.evaluate(nullArgs));
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
index 6a3cdda48a..8c7df4d966 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
@@ -44,24 +44,32 @@ public void testDateFormatStr() throws HiveException {
udf.initialize(arguments);
// date str
- runAndVerifyStr("2015-04-05", fmtText, "Sunday", udf);
- runAndVerifyStr("2015-04-06", fmtText, "Monday", udf);
- runAndVerifyStr("2015-04-07", fmtText, "Tuesday", udf);
- runAndVerifyStr("2015-04-08", fmtText, "Wednesday", udf);
- runAndVerifyStr("2015-04-09", fmtText, "Thursday", udf);
- runAndVerifyStr("2015-04-10", fmtText, "Friday", udf);
- runAndVerifyStr("2015-04-11", fmtText, "Saturday", udf);
- runAndVerifyStr("2015-04-12", fmtText, "Sunday", udf);
+ runAndVerifyStr("2015-04-05", "Sunday", udf);
+ runAndVerifyStr("2015-04-06", "Monday", udf);
+ runAndVerifyStr("2015-04-07", "Tuesday", udf);
+ runAndVerifyStr("2015-04-08", "Wednesday", udf);
+ runAndVerifyStr("2015-04-09", "Thursday", udf);
+ runAndVerifyStr("2015-04-10", "Friday", udf);
+ runAndVerifyStr("2015-04-11", "Saturday", udf);
+ runAndVerifyStr("2015-04-12", "Sunday", udf);
// ts str
- runAndVerifyStr("2015-04-05 10:30:45", fmtText, "Sunday", udf);
- runAndVerifyStr("2015-04-06 10:30:45", fmtText, "Monday", udf);
- runAndVerifyStr("2015-04-07 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyStr("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyStr("2015-04-09 10:30", fmtText, "Thursday", udf);
- runAndVerifyStr("2015-04-10 10:30:45.123", fmtText, "Friday", udf);
- runAndVerifyStr("2015-04-11T10:30:45", fmtText, "Saturday", udf);
- runAndVerifyStr("2015-04-12 10", fmtText, "Sunday", udf);
+ runAndVerifyStr("2015-04-05 10:30:45", "Sunday", udf);
+ runAndVerifyStr("2015-04-06 10:30:45", "Monday", udf);
+ runAndVerifyStr("2015-04-07 10:30:45", "Tuesday", udf);
+ runAndVerifyStr("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyStr("2015-04-09 10:30", "Thursday", udf);
+ runAndVerifyStr("2015-04-10 10:30:45.123", "Friday", udf);
+ runAndVerifyStr("2015-04-11T10:30:45", "Saturday", udf);
+ runAndVerifyStr("2015-04-12 10", "Sunday", udf);
+
+ //make sure hour is ok
+ fmtText = new Text("hh");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyStr("2015-04-10 10:30:45.123", "10", udf);
}
public void testWrongDateStr() throws HiveException {
@@ -73,10 +81,10 @@ public void testWrongDateStr() throws HiveException {
ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2016-02-30 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyStr("2014-01-32", fmtText, "Saturday", udf);
- runAndVerifyStr("01/14/2014", fmtText, null, udf);
- runAndVerifyStr(null, fmtText, null, udf);
+ runAndVerifyStr("2016-02-30 10:30:45", "Tuesday", udf);
+ runAndVerifyStr("2014-01-32", "Saturday", udf);
+ runAndVerifyStr("01/14/2014", null, udf);
+ runAndVerifyStr(null, null, udf);
}
public void testDateFormatDate() throws HiveException {
@@ -89,14 +97,22 @@ public void testDateFormatDate() throws HiveException {
udf.initialize(arguments);
- runAndVerifyDate("2015-04-05", fmtText, "Sunday", udf);
- runAndVerifyDate("2015-04-06", fmtText, "Monday", udf);
- runAndVerifyDate("2015-04-07", fmtText, "Tuesday", udf);
- runAndVerifyDate("2015-04-08", fmtText, "Wednesday", udf);
- runAndVerifyDate("2015-04-09", fmtText, "Thursday", udf);
- runAndVerifyDate("2015-04-10", fmtText, "Friday", udf);
- runAndVerifyDate("2015-04-11", fmtText, "Saturday", udf);
- runAndVerifyDate("2015-04-12", fmtText, "Sunday", udf);
+ runAndVerifyDate("2015-04-05", "Sunday", udf);
+ runAndVerifyDate("2015-04-06", "Monday", udf);
+ runAndVerifyDate("2015-04-07", "Tuesday", udf);
+ runAndVerifyDate("2015-04-08", "Wednesday", udf);
+ runAndVerifyDate("2015-04-09", "Thursday", udf);
+ runAndVerifyDate("2015-04-10", "Friday", udf);
+ runAndVerifyDate("2015-04-11", "Saturday", udf);
+ runAndVerifyDate("2015-04-12", "Sunday", udf);
+
+ // make sure year is ok
+ fmtText = new Text("yyyy");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyDate("2015-04-08", "2015", udf);
}
public void testDateFormatTs() throws HiveException {
@@ -109,15 +125,24 @@ public void testDateFormatTs() throws HiveException {
udf.initialize(arguments);
- runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyTs("2015-04-05 10:30:45", fmtText, "Sunday", udf);
- runAndVerifyTs("2015-04-06 10:30:45", fmtText, "Monday", udf);
- runAndVerifyTs("2015-04-07 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyTs("2015-04-09 10:30:45", fmtText, "Thursday", udf);
- runAndVerifyTs("2015-04-10 10:30:45.123", fmtText, "Friday", udf);
- runAndVerifyTs("2015-04-11 10:30:45.123456789", fmtText, "Saturday", udf);
- runAndVerifyTs("2015-04-12 10:30:45", fmtText, "Sunday", udf);
+ runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyTs("2015-04-05 10:30:45", "Sunday", udf);
+ runAndVerifyTs("2015-04-06 10:30:45", "Monday", udf);
+ runAndVerifyTs("2015-04-07 10:30:45", "Tuesday", udf);
+ runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyTs("2015-04-09 10:30:45", "Thursday", udf);
+ runAndVerifyTs("2015-04-10 10:30:45.123", "Friday", udf);
+ runAndVerifyTs("2015-04-11 10:30:45.123456789", "Saturday", udf);
+ runAndVerifyTs("2015-04-12 10:30:45", "Sunday", udf);
+
+ // make sure hour of day is ok
+ fmtText = new Text("HH");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyTs("2015-04-08 00:30:45", "00", udf);
+
}
public void testNullFmt() throws HiveException {
@@ -126,11 +151,11 @@ public void testNullFmt() throws HiveException {
Text fmtText = null;
ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
- ObjectInspector[] arguments = { valueOI0, valueOI1 };
+ ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2015-04-05", fmtText, null, udf);
+ runAndVerifyStr("2015-04-05", null, udf);
}
public void testWrongFmt() throws HiveException {
@@ -139,38 +164,35 @@ public void testWrongFmt() throws HiveException {
Text fmtText = new Text("Q");
ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
- ObjectInspector[] arguments = { valueOI0, valueOI1 };
+ ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2015-04-05", fmtText, null, udf);
+ runAndVerifyStr("2015-04-05", null, udf);
}
- private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyStr(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
- private void runAndVerifyDate(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyDate(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new DateWritableV2(
Date.valueOf(str)) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
- private void runAndVerifyTs(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyTs(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritableV2(
Timestamp.valueOf(str)) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
diff --git ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
new file mode 100644
index 0000000000..daeb61c975
--- /dev/null
+++ ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
@@ -0,0 +1,63 @@
+--non-vectorized
+set hive.vectorized.execution.enabled=false;
+set hive.fetch.task.conversion=more;
+
+create table timestamp1 (t timestamp) stored as parquet;
+insert into timestamp1 values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+;
+from timestamp1 select cast (t as string format "yyyy hh24...PM ff");
+from timestamp1 select cast (t as char(11) format "yyyy hh24...PM ff"); -- will be truncated
+from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff"); -- will be truncated
+
+create table dates (d date) stored as parquet;
+insert into dates values
+("2020-02-03"),
+("1969-12-31")
+;
+from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9");
+from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9"); -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9"); -- will be truncated
+
+create table strings (s string) stored as parquet;
+create table varchars (s varchar(11)) stored as parquet;
+create table chars (s char(11)) stored as parquet;
+insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+;
+insert into varchars select * from strings;
+insert into chars select * from strings;
+
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
+
+
+--correct descriptions
+explain from strings select cast (s as timestamp format "yyy.mm.dd");
+explain from strings select cast (s as date format "yyy.mm.dd");
+explain from timestamp1 select cast (t as string format "yyyy");
+explain from timestamp1 select cast (t as varchar(12) format "yyyy");
+
+
+--vectorized
+set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+from timestamp1 select cast (t as string format "yyyy");
+from dates select cast (d as string format "yyyy");
+from timestamp1 select cast (t as varchar(11) format "yyyy");
+from dates select cast (d as varchar(11) format "yyyy");
+from timestamp1 select cast (t as char(11) format "yyyy");
+from dates select cast (d as char(11) format "yyyy");
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
diff --git ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
new file mode 100644
index 0000000000..e5f0d4bf35
--- /dev/null
+++ ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out
@@ -0,0 +1,469 @@
+PREHOOK: query: drop table if exists timestamps
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists timestamps
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists dates
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists dates
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists strings
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists strings
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists chars
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists chars
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists varchars
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists varchars
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table timestamps (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: create table timestamps (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps
+PREHOOK: query: insert into timestamps values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: insert into timestamps values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.t SCRIPT []
+PREHOOK: query: from timestamps select cast (t as string format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as string format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2020 00...AM 0
+1969 23...PM 999999999
+PREHOOK: query: from timestamps select cast (t as char(11) format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as char(11) format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2020 00...A
+1969 23...P
+PREHOOK: query: -- will be truncated
+from timestamps select cast (t as varchar(11) format "yyyy hh24...PM ff")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: -- will be truncated
+from timestamps select cast (t as varchar(11) format "yyyy hh24...PM ff")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2020 00...A
+1969 23...P
+PREHOOK: query: -- will be truncated
+
+create table dates (d date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates
+POSTHOOK: query: -- will be truncated
+
+create table dates (d date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates
+PREHOOK: query: insert into dates values
+("2020-02-03"),
+("1969-12-31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates
+POSTHOOK: query: insert into dates values
+("2020-02-03"),
+("1969-12-31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates
+POSTHOOK: Lineage: dates.d SCRIPT []
+PREHOOK: query: from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03 , 00 00 00 000000000
+1969 12 31 , 00 00 00 000000000
+PREHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03
+1969 12 31
+PREHOOK: query: -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020 02 03
+1969 12 31
+PREHOOK: query: -- will be truncated
+
+create table strings (s string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@strings
+POSTHOOK: query: -- will be truncated
+
+create table strings (s string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@strings
+PREHOOK: query: create table varchars (s varchar(11)) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@varchars
+POSTHOOK: query: create table varchars (s varchar(11)) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@varchars
+PREHOOK: query: create table chars (s char(11)) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@chars
+POSTHOOK: query: create table chars (s char(11)) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@chars
+PREHOOK: query: insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@strings
+POSTHOOK: query: insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@strings
+POSTHOOK: Lineage: strings.s SCRIPT []
+PREHOOK: query: insert into varchars select * from strings
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+PREHOOK: Output: default@varchars
+POSTHOOK: query: insert into varchars select * from strings
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+POSTHOOK: Output: default@varchars
+POSTHOOK: Lineage: varchars.s EXPRESSION [(strings)strings.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: insert into chars select * from strings
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+PREHOOK: Output: default@chars
+POSTHOOK: query: insert into chars select * from strings
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+POSTHOOK: Output: default@chars
+POSTHOOK: Lineage: chars.s EXPRESSION [(strings)strings.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: explain from strings select cast (s as timestamp format "yyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain from strings select cast (s as timestamp format "yyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS TIMESTAMP FORMAT 'yyy.mm.dd') (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS DATE FORMAT 'yyy.mm.dd') (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: explain from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS STRING FORMAT 'yyyy') (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: explain from timestamps select cast (t as varchar(12) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: explain from timestamps select cast (t as varchar(12) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS AS VARCHAR(12) FORMAT 'yyyy') (type: varchar(12))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from dates select cast (d as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from timestamps select cast (t as varchar(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as varchar(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from dates select cast (d as varchar(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as varchar(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from timestamps select cast (t as char(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as char(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from dates select cast (d as char(11) format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates
+#### A masked pattern was here ####
+POSTHOOK: query: from dates select cast (d as char(11) format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates
+#### A masked pattern was here ####
+2020
+1969
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03 00:00:00
+1969-12-31 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@varchars
+#### A masked pattern was here ####
+POSTHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@varchars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
+PREHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@chars
+#### A masked pattern was here ####
+POSTHOOK: query: from chars select cast (s as date format "yyyy.mm.dd")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@chars
+#### A masked pattern was here ####
+2020-02-03
+1969-12-31
diff --git ql/src/test/results/clientpositive/udf_string.q.out ql/src/test/results/clientpositive/udf_string.q.out
index aa764a9db6..6da63be8f8 100644
--- ql/src/test/results/clientpositive/udf_string.q.out
+++ ql/src/test/results/clientpositive/udf_string.q.out
@@ -2,12 +2,13 @@ PREHOOK: query: DESCRIBE FUNCTION string
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION string
POSTHOOK: type: DESCFUNCTION
-CAST( as STRING) - Converts the argument to a string value.
+CAST( as STRING [FORMAT ]) - Converts the argument to a string value.
PREHOOK: query: DESCRIBE FUNCTION EXTENDED string
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED string
POSTHOOK: type: DESCFUNCTION
-CAST( as STRING) - Converts the argument to a string value.
+CAST( as STRING [FORMAT ]) - Converts the argument to a string value.
+If format is specified with FORMAT argument then SQL:2016 datetime formats will be used.
Example:
> SELECT CAST(1234 AS string) FROM src LIMIT 1;
'1234'
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
index 4b6a3d6c10..4ff4732324 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
@@ -21,6 +21,7 @@
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;
@@ -147,6 +148,10 @@ public String toString() {
return date.toString();
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ return date.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
return date.toEpochDay();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
index 9aa7f19ab2..5972bd92b5 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.time.format.DateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.common.type.TimestampUtils;
@@ -387,6 +388,16 @@ public String toString() {
return timestamp.format(DATE_TIME_FORMAT);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ if (timestampEmpty) {
+ populateTimestamp();
+ }
+ return timestamp.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
long seconds = getSeconds();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
index 84c027d51c..3da8a18c4e 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
@@ -20,6 +20,7 @@
import java.time.ZoneId;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -250,6 +251,7 @@ public Object convert(Object input) {
PrimitiveObjectInspector inputOI;
SettableDateObjectInspector outputOI;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public DateConverter(PrimitiveObjectInspector inputOI,
SettableDateObjectInspector outputOI) {
@@ -263,7 +265,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input,
- inputOI));
+ inputOI, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -272,6 +278,7 @@ public Object convert(Object input) {
SettableTimestampObjectInspector outputOI;
boolean intToTimestampInSeconds = false;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public TimestampConverter(PrimitiveObjectInspector inputOI,
SettableTimestampObjectInspector outputOI) {
@@ -289,7 +296,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestamp(input,
- inputOI, intToTimestampInSeconds));
+ inputOI, intToTimestampInSeconds, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -416,6 +427,7 @@ public Object convert(Object input) {
private static byte[] trueBytes = {'T', 'R', 'U', 'E'};
private static byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'};
+ private HiveDateTimeFormatter formatter = null;
public TextConverter(PrimitiveObjectInspector inputOI) {
// The output ObjectInspector is writableStringObjectInspector.
@@ -486,11 +498,12 @@ public Text convert(Object input) {
}
return t;
case DATE:
- t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
+ t.set(((DateObjectInspector) inputOI)
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMP:
t.set(((TimestampObjectInspector) inputOI)
- .getPrimitiveWritableObject(input).toString());
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMPLOCALTZ:
t.set(((TimestampLocalTZObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
@@ -520,6 +533,10 @@ public Text convert(Object input) {
throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName());
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
/**
@@ -545,6 +562,7 @@ public Object convert(Object input) {
PrimitiveObjectInspector inputOI;
SettableHiveVarcharObjectInspector outputOI;
Object hc;
+ private HiveDateTimeFormatter formatter;
public HiveVarcharConverter(PrimitiveObjectInspector inputOI,
SettableHiveVarcharObjectInspector outputOI) {
@@ -567,21 +585,26 @@ public Object convert(Object input) {
return null;
}
switch (inputOI.getPrimitiveCategory()) {
- case BOOLEAN:
- return outputOI.set(hc,
- ((BooleanObjectInspector) inputOI).get(input) ?
- new HiveVarchar("TRUE", -1) : new HiveVarchar("FALSE", -1));
- default:
- return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI));
+ case BOOLEAN:
+ return outputOI.set(hc,
+ ((BooleanObjectInspector) inputOI).get(input) ? new HiveVarchar("TRUE",
+ -1) : new HiveVarchar("FALSE", -1));
+ default:
+ return outputOI
+ .set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI, formatter));
}
}
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
public static class HiveCharConverter implements Converter {
PrimitiveObjectInspector inputOI;
SettableHiveCharObjectInspector outputOI;
Object hc;
+ private HiveDateTimeFormatter formatter;
public HiveCharConverter(PrimitiveObjectInspector inputOI,
SettableHiveCharObjectInspector outputOI) {
@@ -601,8 +624,13 @@ public Object convert(Object input) {
((BooleanObjectInspector) inputOI).get(input) ?
new HiveChar("TRUE", -1) : new HiveChar("FALSE", -1));
default:
- return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI));
+ return outputOI.set(hc,
+ PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI, formatter));
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
}
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 3886b202c7..6cf231e7ae 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -29,6 +29,7 @@
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -891,12 +892,18 @@ public static float getFloat(Object o, PrimitiveObjectInspector oi) {
return (float) getDouble(o, oi);
}
+ public static String getString(Object o, PrimitiveObjectInspector oi) {
+ return getString(o, oi, null);
+ }
+
/**
* Get the String value out of a primitive object. Note that
* NullPointerException will be thrown if o is null. Note that
* RuntimeException will be thrown if o is not a valid string.
+ * HiveDateTimeFormatter is optional.
*/
- public static String getString(Object o, PrimitiveObjectInspector oi) {
+ public static String getString(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
@@ -951,10 +958,12 @@ public static String getString(Object o, PrimitiveObjectInspector oi) {
result = hcoi.getPrimitiveJavaObject(o).toString();
break;
case DATE:
- result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o)
+ .toStringFormatted(formatter);
break;
case TIMESTAMP:
- result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)
+ .toStringFormatted(formatter);
break;
case TIMESTAMPLOCALTZ:
result = ((TimestampLocalTZObjectInspector) oi).getPrimitiveWritableObject(o).toString();
@@ -978,25 +987,35 @@ public static String getString(Object o, PrimitiveObjectInspector oi) {
}
public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi) {
+ return getHiveChar(o, oi, null);
+ }
+
+ public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
HiveChar result = null;
switch (oi.getPrimitiveCategory()) {
- case CHAR:
- result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o);
- break;
- default:
- // No char length available, copy whole string value here.
- result = new HiveChar();
- result.setValue(getString(o, oi));
- break;
+ case CHAR:
+ result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o);
+ break;
+ default:
+ // No char length available, copy whole string value here.
+ result = new HiveChar();
+ result.setValue(getString(o, oi, formatter));
+ break;
}
return result;
}
public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) {
+ return getHiveVarchar(o, oi, null);
+ }
+
+ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
@@ -1004,16 +1023,16 @@ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi)
HiveVarchar result = null;
switch (oi.getPrimitiveCategory()) {
- case VARCHAR:
- result = ((HiveVarcharObjectInspector)oi).getPrimitiveJavaObject(o);
- break;
- default:
- // Is there a way to provide char length here?
- // It might actually be ok as long as there is an object inspector (with char length)
- // receiving this value.
- result = new HiveVarchar();
- result.setValue(getString(o, oi));
- break;
+ case VARCHAR:
+ result = ((HiveVarcharObjectInspector) oi).getPrimitiveJavaObject(o);
+ break;
+ default:
+ // Is there a way to provide char length here?
+ // It might actually be ok as long as there is an object inspector (with char length)
+ // receiving this value.
+ result = new HiveVarchar();
+ result.setValue(getString(o, oi, formatter));
+ break;
}
return result;
}
@@ -1113,6 +1132,11 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi)
}
public static Date getDate(Object o, PrimitiveObjectInspector oi) {
+ return getDate(o, oi, null);
+ }
+
+ public static Date getDate(
+ Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
@@ -1125,13 +1149,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
StringObjectInspector soi = (StringObjectInspector) oi;
String s = soi.getPrimitiveJavaObject(o).trim();
try {
- if (s.length() == DATE_LENGTH) {
- result = Date.valueOf(s);
- } else {
- Timestamp ts = getTimestampFromString(s);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(s, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1141,13 +1161,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
case VARCHAR: {
String val = getString(o, oi).trim();
try {
- if (val.length() == DATE_LENGTH) {
- result = Date.valueOf(val);
- } else {
- Timestamp ts = getTimestampFromString(val);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(val, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1177,11 +1193,46 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
return result;
}
+ private final static int DATE_LENGTH = "YYYY-MM-DD".length();
+ private static Date getDateFromString(String s, HiveDateTimeFormatter formatter) {
+
+ // with SQL formats
+ if (formatter != null) {
+ try {
+ return Date.valueOf(s, formatter);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
+ // without SQL formats
+ if (s.length() == DATE_LENGTH) {
+ return Date.valueOf(s);
+ } else {
+ Timestamp ts = getTimestampFromString(s);
+ if (ts != null) {
+ return Date.ofEpochMilli(ts.toEpochMilli());
+ }
+ }
+ return null;
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) {
return getTimestamp(o, oi, false);
}
+ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
+ return getTimestamp(o, oi, false, formatter);
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, boolean intToTimestampInSeconds) {
+ return getTimestamp(o, inputOI, intToTimestampInSeconds, null);
+ }
+
+ public static Timestamp getTimestamp(Object o,
+ PrimitiveObjectInspector inputOI,
+ boolean intToTimestampInSeconds,
+ HiveDateTimeFormatter format) {
if (o == null) {
return null;
}
@@ -1225,11 +1276,11 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
case STRING:
StringObjectInspector soi = (StringObjectInspector) inputOI;
String s = soi.getPrimitiveJavaObject(o);
- result = getTimestampFromString(s);
+ result = getTimestampFromString(s, format);
break;
case CHAR:
case VARCHAR:
- result = getTimestampFromString(getString(o, inputOI));
+ result = getTimestampFromString(getString(o, inputOI), format);
break;
case DATE:
result = Timestamp.ofEpochMilli(
@@ -1254,15 +1305,17 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
return result;
}
- private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length();
- private final static int DATE_LENGTH = "YYYY-MM-DD".length();
-
public static Timestamp getTimestampFromString(String s) {
+ return getTimestampFromString(s, null);
+ }
+
+ public static Timestamp getTimestampFromString(String s, HiveDateTimeFormatter formatter) {
+
s = s.trim();
s = trimNanoTimestamp(s);
try {
- return TimestampUtils.stringToTimestamp(s);
+ return TimestampUtils.stringToTimestamp(s, formatter);
} catch (IllegalArgumentException e) {
return null;
}
@@ -1284,19 +1337,6 @@ private static String trimNanoTimestamp(String s) {
return s;
}
- private static boolean isValidTimeStamp(final String s) {
- if (s.length() == TS_LENGTH ||
- (s.contains(".") &&
- s.substring(0, s.indexOf('.')).length() == TS_LENGTH)) {
- // Possible timestamp
- if (s.charAt(DATE_LENGTH) == '-') {
- return false;
- }
- return true;
- }
- return false;
- }
-
public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi,
ZoneId timeZone) {
if (o == null) {