diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/DefaultHiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/DefaultHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..c6651ff02a
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/DefaultHiveSqlDateTimeFormatter.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.util.Map;
+
+/**
+ * Parse/format datetime objects according to some select default SQL:2016 formats.
+ */
+public class DefaultHiveSqlDateTimeFormatter {
+
+ private static HiveSqlDateTimeFormatter formatterDate = new HiveSqlDateTimeFormatter();
+ private static HiveSqlDateTimeFormatter formatterNoNanos = new HiveSqlDateTimeFormatter();
+ private static HiveSqlDateTimeFormatter formatterWithNanos = new HiveSqlDateTimeFormatter();
+
+ private static HiveSqlDateTimeFormatter formatterIsoNoNanos = new HiveSqlDateTimeFormatter();
+ private static HiveSqlDateTimeFormatter formatterIsoWithNanos = new HiveSqlDateTimeFormatter();
+
+ private static HiveSqlDateTimeFormatter formatterIsoNoNanosNoZ = new HiveSqlDateTimeFormatter();
+ private static HiveSqlDateTimeFormatter formatterIsoWithNanosNoZ = new HiveSqlDateTimeFormatter();
+
+ static {
+ //forParsing is false because there's no need to verify pattern
+ formatterDate.setPattern("yyyy-mm-dd", false);
+ formatterNoNanos.setPattern("yyyy-mm-dd hh24:mi:ss", false);
+ formatterWithNanos.setPattern("yyyy-mm-dd hh24:mi:ss.ff", false);
+
+ formatterIsoNoNanos.setPattern("yyyy-mm-ddThh24:mi:ssZ", false);
+ formatterIsoWithNanos.setPattern("yyyy-mm-ddThh24:mi:ss.ffZ", false);
+
+ formatterIsoNoNanosNoZ.setPattern("yyyy-mm-ddThh24:mi:ss", false);
+ formatterIsoWithNanosNoZ.setPattern("yyyy-mm-ddThh24:mi:ss.ff", false);
+ }
+
+ private static final Map TOKEN_COUNT_FORMATTER_MAP =
+ ImmutableMap.builder()
+ .put(3, formatterDate).put(6, formatterNoNanos).put(7, formatterWithNanos).build();
+
+ private static final Map TOKEN_COUNT_ISO_FORMATTER_MAP =
+ ImmutableMap.builder()
+ .put(8, formatterIsoNoNanos).put(9, formatterIsoWithNanos).build();
+
+ private static final Map TOKEN_COUNT_ISO_FORMATTER_MAP_NO_Z =
+ ImmutableMap.builder()
+ .put(7, formatterIsoNoNanosNoZ).put(8, formatterIsoWithNanosNoZ).build();
+
+ public static String format(Timestamp ts) {
+ return (ts.getNanos() == 0) ? formatterNoNanos.format(ts) : formatterWithNanos.format(ts);
+ }
+
+ public static String format(Date date) {
+ return formatterDate.format(date);
+ }
+
+ public static Timestamp parseTimestamp(String input) {
+ input = input.trim();
+ HiveSqlDateTimeFormatter formatter = getFormatter(input);
+ return formatter.parseTimestamp(input);
+ }
+
+ public static Date parseDate(String input) {
+ HiveSqlDateTimeFormatter formatter = getFormatter(input);
+ return formatter.parseDate(input.trim());
+ }
+
+ private static HiveSqlDateTimeFormatter getFormatter(String input) {
+ Map map;
+ if (input.toLowerCase().contains("t")) {
+ if (input.toLowerCase().contains("z")) {
+ map = TOKEN_COUNT_ISO_FORMATTER_MAP;
+ } else {
+ map = TOKEN_COUNT_ISO_FORMATTER_MAP_NO_Z;
+ }
+ } else {
+ map = TOKEN_COUNT_FORMATTER_MAP;
+ }
+
+ int numberOfTokenGroups = getNumberOfTokenGroups(input);
+ if (!map.containsKey(numberOfTokenGroups)) {
+ throw new IllegalArgumentException("No available default parser for input: " + input);
+ }
+ return map.get(numberOfTokenGroups);
+ }
+
+ // count number of non-separator tokens
+ static int getNumberOfTokenGroups(String input) {
+ int count = 0;
+ boolean lastCharWasSep = true, isIsoDelimiter;
+
+ for (char c : input.toCharArray()) {
+ String s = String.valueOf(c);
+ isIsoDelimiter = HiveSqlDateTimeFormatter.VALID_ISO_8601_DELIMITERS.contains(s.toLowerCase());
+ if (!HiveSqlDateTimeFormatter.VALID_SEPARATORS.contains(s)) {
+ if (!isIsoDelimiter && !Character.isDigit(c)) { // it's probably part of a time zone. Halt.
+ break;
+ }
+ if (lastCharWasSep || isIsoDelimiter ) {
+ count++;
+ }
+ // ISO delimiters are... delimiters
+ lastCharWasSep = isIsoDelimiter;
+ } else {
+ lastCharWasSep = true;
+ }
+ }
+ return count;
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
new file mode 100644
index 0000000000..44367e41df
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+/**
+ * Interface used for formatting and parsing timestamps. Initially created so that user is able to
+ * optionally format datetime objects into strings and parse strings into datetime objects with
+ * SQL:2016 semantics.
+ */
+public interface HiveDateTimeFormatter {
+ /**
+ * Format the given timestamp into a string.
+ *
+ * @throws IllegalArgumentException if timestamp cannot be formatted.
+ */
+ String format(Timestamp ts);
+
+ /**
+ * Format the given date into a string.
+ *
+ * @throws IllegalArgumentException if date cannot be formatted.
+ */
+ String format(Date date);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws IllegalArgumentException if string cannot be parsed.
+ */
+ Timestamp parseTimestamp(String string);
+
+ /**
+ * Parse the given string into a timestamp.
+ *
+ * @throws IllegalArgumentException if string cannot be parsed.
+ */
+ Date parseDate(String string);
+
+ /**
+ * Set the format pattern to be used for formatting timestamps or parsing strings.
+ * This method parses the pattern into tokens, so it comes with some performance overhead.
+ *
+ * @param pattern string representing a pattern
+ * @param forParsing true if the pattern will be used to parse a string; false if for formatting
+ * a datetime object
+ *
+ * @throws IllegalArgumentException if contains invalid patterns: generally invalid patterns,
+ * or patterns specifically not allowed for parsing or formatting)
+ */
+ void setPattern(String pattern, boolean forParsing);
+
+ /**
+ * Get the format pattern to be used for formatting datetime objects or parsing strings.
+ */
+ String getPattern();
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..edd0a66099
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java
@@ -0,0 +1,647 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.DateTimeException;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.temporal.ChronoField;
+import java.time.temporal.ChronoUnit;
+import java.time.temporal.TemporalField;
+import java.time.temporal.TemporalUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * Formatter using SQL:2016 datetime patterns.
+ */
+
+public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter {
+
+ private static final int LONGEST_TOKEN_LENGTH = 5;
+ private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake
+ private static final long MINUTES_PER_HOUR = 60;
+ private static final int _50 = 50;
+ private static final int NANOS_MAX_LENGTH = 9;
+ public static final int AM = 0;
+ public static final int PM = 1;
+ private String pattern;
+ protected List tokens = new ArrayList<>();
+
+ private static final Map VALID_TEMPORAL_TOKENS =
+ ImmutableMap.builder()
+ .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR)
+ .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR)
+ .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR)
+ .put("mm", ChronoField.MONTH_OF_YEAR)
+ .put("dd", ChronoField.DAY_OF_MONTH)
+ .put("ddd", ChronoField.DAY_OF_YEAR)
+ .put("hh", ChronoField.HOUR_OF_AMPM)
+ .put("hh12", ChronoField.HOUR_OF_AMPM)
+ .put("hh24", ChronoField.HOUR_OF_DAY)
+ .put("mi", ChronoField.MINUTE_OF_HOUR)
+ .put("ss", ChronoField.SECOND_OF_MINUTE)
+ .put("sssss", ChronoField.SECOND_OF_DAY)
+ .put("ff1", ChronoField.NANO_OF_SECOND).put("ff2", ChronoField.NANO_OF_SECOND)
+ .put("ff3", ChronoField.NANO_OF_SECOND).put("ff4", ChronoField.NANO_OF_SECOND)
+ .put("ff5", ChronoField.NANO_OF_SECOND).put("ff6", ChronoField.NANO_OF_SECOND)
+ .put("ff7", ChronoField.NANO_OF_SECOND).put("ff8", ChronoField.NANO_OF_SECOND)
+ .put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND)
+ .put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY)
+ .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY)
+ .build();
+
+ private static final Map VALID_TIME_ZONE_TOKENS =
+ ImmutableMap.builder()
+ .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build();
+
+ static final List VALID_ISO_8601_DELIMITERS =
+ ImmutableList.of("t", "z");
+
+ static final List VALID_SEPARATORS =
+ ImmutableList.of("-", ":", " ", ".", "/", ";", "\'", ",");
+
+ private static final Map SPECIAL_LENGTHS = ImmutableMap.builder()
+ .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4)
+ .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5)
+ .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9)
+ .build();
+
+ public enum TokenType {
+ TEMPORAL,
+ SEPARATOR,
+ TIMEZONE,
+ ISO_8601_DELIMITER
+ }
+
+ public static class Token {
+ TokenType type;
+ TemporalField temporalField; // for type TEMPORAL e.g. ChronoField.YEAR
+ TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS
+ String string; // pattern string, e.g. "yyy"
+ int length; // length (e.g. YYY: 3, FF8: 8)
+
+ public Token(TemporalField temporalField, String string, int length) {
+ this(TokenType.TEMPORAL, temporalField, null, string, length);
+ }
+
+ public Token(TemporalUnit temporalUnit, String string, int length) {
+ this(TokenType.TIMEZONE, null, temporalUnit, string, length);
+ }
+
+ public Token(TokenType tokenType, String string) {
+ this(tokenType, null, null, string, string.length());
+ }
+
+ public Token(TokenType tokenType, TemporalField temporalField, TemporalUnit temporalUnit,
+ String string, int length) {
+ this.type = tokenType;
+ this.temporalField = temporalField;
+ this.temporalUnit = temporalUnit;
+ this.string = string;
+ this.length = length;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(string);
+ sb.append(" type: ");
+ sb.append(type);
+ if (temporalField != null) {
+ sb.append(" temporalField: ");
+ sb.append(temporalField);
+ } else if (temporalUnit != null) {
+ sb.append(" temporalUnit: ");
+ sb.append(temporalUnit);
+ }
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Parse and perhaps verify the pattern.
+ */
+ @Override public void setPattern(String pattern, boolean forParsing) {
+ assert pattern.length() < LONGEST_ACCEPTED_PATTERN : "The input format is too long";
+
+ this.pattern = parsePatternToTokens(pattern);
+
+ // throw Exception if list of tokens doesn't make sense for parsing. Formatting is less picky.
+ if (forParsing) {
+ verifyForParse();
+ } else {
+ verifyForFormat();
+ }
+ }
+
+ /**
+ * Parse pattern to list of tokens.
+ */
+ private String parsePatternToTokens(String pattern) {
+ tokens.clear();
+ String originalPattern = pattern;
+ pattern = pattern.toLowerCase();
+
+ // indexes of the substring we will check (includes begin, does not include end)
+ int begin=0, end=0;
+ String candidate;
+ Token lastAddedToken = null;
+
+ while (begin < pattern.length()) {
+
+ // if begin hasn't progressed, then something is unparseable
+ if (begin != end) {
+ tokens.clear();
+ throw new IllegalArgumentException("Bad date/time conversion format: " + pattern);
+ }
+
+ //process next token: start with substring
+ for (int i = LONGEST_TOKEN_LENGTH; i > 0; i--) {
+ end = begin + i;
+ if (end > pattern.length()) { // don't go past the end of the pattern string
+ continue;
+ }
+ candidate = pattern.substring(begin, end);
+ // if it's a separator, then clump it with immediately preceding separators (e.g. "---"
+ // counts as one separator).
+ if (candidate.length() == 1 && VALID_SEPARATORS.contains(candidate)) {
+ if (lastAddedToken != null && lastAddedToken.type == TokenType.SEPARATOR) {
+ lastAddedToken.string += candidate;
+ lastAddedToken.length += 1;
+ } else {
+ lastAddedToken = new Token(TokenType.SEPARATOR, candidate);
+ tokens.add(lastAddedToken);
+ }
+ begin = end;
+ break;
+ } else if (candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate)) {
+ lastAddedToken = new Token(TokenType.ISO_8601_DELIMITER, candidate.toUpperCase());
+ tokens.add(lastAddedToken);
+ begin = end;
+ break;
+ //temporal token
+ } else if (VALID_TEMPORAL_TOKENS.keySet().contains(candidate)) {
+ // for AM/PM, keep original case
+ if (VALID_TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) {
+ int subStringEnd = begin + candidate.length();
+ candidate = originalPattern.substring(begin, subStringEnd);
+ //token string may be capitalized, update pattern
+ pattern = pattern.substring(0, begin) + candidate + pattern.substring(subStringEnd);
+ }
+ lastAddedToken = new Token(VALID_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate,
+ getTokenStringLength(candidate.toLowerCase()));
+ tokens.add(lastAddedToken);
+ begin = end;
+ break;
+ //time zone
+ } else if (VALID_TIME_ZONE_TOKENS.keySet().contains(candidate)) {
+ lastAddedToken = new Token(VALID_TIME_ZONE_TOKENS.get(candidate), candidate,
+ getTokenStringLength(candidate));
+ tokens.add(lastAddedToken);
+ begin = end;
+ break;
+ }
+ }
+ }
+ return pattern;
+ }
+
+ private int getTokenStringLength(String candidate) {
+ if (SPECIAL_LENGTHS.containsKey(candidate)) {
+ return SPECIAL_LENGTHS.get(candidate);
+ }
+ return candidate.length();
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for parsing strings to datetime objects.
+ */
+ private void verifyForParse() {
+
+ // create a list of tokens' temporal fields
+ ArrayList temporalFields = new ArrayList<>();
+ ArrayList timeZoneTemporalUnits = new ArrayList<>();
+ int roundYearCount=0 ,yearCount=0;
+ for (Token token : tokens) {
+ if (token.temporalField != null) {
+ temporalFields.add(token.temporalField);
+ if (token.temporalField == ChronoField.YEAR) {
+ if (token.string.startsWith("r")) {
+ roundYearCount += 1;
+ } else {
+ yearCount += 1;
+ }
+ }
+ } else if (token.temporalUnit != null) {
+ timeZoneTemporalUnits.add(token.temporalUnit);
+ }
+ }
+
+ if (roundYearCount > 0 && yearCount > 0) {
+ throw new IllegalArgumentException("Invalid duplication of format element: Both year and"
+ + "round year are provided");
+ }
+ for (TemporalField tokenType : temporalFields) {
+ if (Collections.frequency(temporalFields, tokenType) > 1) {
+ throw new IllegalArgumentException(
+ "Invalid duplication of format element: multiple " + tokenType.toString()
+ + " tokens provided.");
+ }
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ !(temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM))) {
+ throw new IllegalArgumentException("Missing hour token.");
+ }
+ if (temporalFields.contains(ChronoField.AMPM_OF_DAY) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between median indicator and hour token.");
+ }
+ if (temporalFields.contains(ChronoField.HOUR_OF_AMPM) &&
+ temporalFields.contains(ChronoField.HOUR_OF_DAY)) {
+ throw new IllegalArgumentException("Conflict between hour of day and hour of am/pm token.");
+ }
+ if (temporalFields.contains(ChronoField.DAY_OF_YEAR) &&
+ (temporalFields.contains(ChronoField.DAY_OF_MONTH) ||
+ temporalFields.contains(ChronoField.MONTH_OF_YEAR))) {
+ throw new IllegalArgumentException("Day of year provided with day or month token.");
+ }
+ if (temporalFields.contains(ChronoField.SECOND_OF_DAY) &&
+ (temporalFields.contains(ChronoField.HOUR_OF_DAY) ||
+ temporalFields.contains(ChronoField.HOUR_OF_AMPM) ||
+ temporalFields.contains(ChronoField.MINUTE_OF_HOUR) ||
+ temporalFields.contains(ChronoField.SECOND_OF_MINUTE))) {
+ throw new IllegalArgumentException(
+ "Second of day token conflicts with other token(s).");
+ }
+ if (timeZoneTemporalUnits.contains(ChronoUnit.MINUTES) &&
+ !timeZoneTemporalUnits.contains(ChronoUnit.HOURS)) {
+ throw new IllegalArgumentException("Time zone minute token provided without time zone hour token.");
+ }
+ }
+
+ /**
+ * Make sure the generated list of tokens is valid for formatting datetime objects to strings.
+ */
+ private void verifyForFormat() {
+ for (Token token : tokens) {
+ if (token.type == TokenType.TIMEZONE) {
+ throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for "
+ + "timestamp or date.");
+ }
+ }
+ }
+
+ @Override public String format(Timestamp ts) {
+ StringBuilder fullOutputSb = new StringBuilder();
+ String outputString = null;
+ int value;
+ LocalDateTime localDateTime =
+ LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC);
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ try {
+ value = localDateTime.get(token.temporalField);
+ outputString = formatTemporal(value, token);
+ } catch (DateTimeException e) {
+ throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from "
+ + "LocalDateTime " + localDateTime, e);
+ }
+ break;
+ case TIMEZONE: //invalid for timestamp and date todo frogmethod throw exception
+ break;
+ case SEPARATOR:
+ outputString = token.string;
+ break;
+ case ISO_8601_DELIMITER:
+ outputString = token.string.toUpperCase();
+ break;
+ }
+ fullOutputSb.append(outputString);
+ }
+ return fullOutputSb.toString();
+ }
+
+ @Override public String format(Date date) {
+ return format(Timestamp.ofEpochSecond(date.toEpochSecond()));
+ }
+
+ private String formatTemporal(int value, Token token) {
+ String output;
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ output = value == 0 ? "a" : "p";
+ output += token.string.length() == 2 ? "m" : ".m.";
+ if (token.string.startsWith("A") || token.string.startsWith("P")) {
+ output = output.toUpperCase();
+ }
+ } else {
+ // it's a numeric value
+ try {
+ output = String.valueOf(value);
+ output = padOrTruncateNumericTemporal(token, output);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Value: " + value + " couldn't be cast to string.", e);
+ }
+ }
+ return output;
+ }
+
+ /**
+ * To match token.length, pad left with zeroes or truncate.
+ */
+ private String padOrTruncateNumericTemporal(Token token, String output) {
+ if (output.length() < token.length) {
+ output = StringUtils.leftPad(output, token.length, '0'); // pad left
+ } else if (output.length() > token.length) {
+ if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ output = output.substring(0, token.length); // truncate right
+ } else {
+ output = output.substring(output.length() - token.length); // truncate left
+ }
+ }
+ if (token.temporalField == ChronoField.NANO_OF_SECOND
+ && token.string.equalsIgnoreCase("ff")) {
+ output = output.replaceAll("0*$", ""); //truncate trailing 0's
+ if (output.isEmpty()) {
+ output = "0";
+ }
+ }
+ return output;
+ }
+
+ /**
+ * Left here for timestamp with local time zone.
+ */
+ private String formatTimeZone(TimeZone timeZone, LocalDateTime localDateTime, Token token) {
+ ZoneOffset offset = timeZone.toZoneId().getRules().getOffset(localDateTime);
+ Duration seconds = Duration.of(offset.get(ChronoField.OFFSET_SECONDS), ChronoUnit.SECONDS);
+ if (token.string.equals("tzh")) {
+ long hours = seconds.toHours();
+ String s = (hours >= 0) ? "+" : "-";
+ s += (Math.abs(hours) < 10) ? "0" : "";
+ s += String.valueOf(Math.abs(hours));
+ return s;
+ } else {
+ long minutes = Math.abs(seconds.toMinutes() % MINUTES_PER_HOUR);
+ String s = String.valueOf(minutes);
+ if (s.length() == 1) {
+ s = "0" + s;
+ }
+ return s;
+ }
+ }
+
+ @Override public Timestamp parseTimestamp(String fullInput){
+ LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC);
+ String substring;
+ int index = 0;
+ int value;
+ int timeZoneSign = 0, timeZoneHours = 0, timeZoneMinutes = 0;
+
+ for (Token token : tokens) {
+ switch (token.type) {
+ case TEMPORAL:
+ substring = getNextSubstring(fullInput, index, token); // e.g. yy-m -> yy
+ value = parseTemporal(substring, token); // e.g. 18->2018, July->07
+ try {
+ ldt = ldt.with(token.temporalField, value);
+ } catch (DateTimeException e){
+ throw new IllegalArgumentException(
+ "Value " + value + " not valid for token " + token.toString());
+ }
+ index += substring.length();
+ break;
+ case TIMEZONE:
+ if (token.temporalUnit == ChronoUnit.HOURS) {
+ String nextCharacter = fullInput.substring(index, index + 1);
+ timeZoneSign = "-".equals(nextCharacter) ? -1 : 1;
+ if ("-".equals(nextCharacter) || "+".equals(nextCharacter)) {
+ index++;
+ }
+ // parse next two digits
+ substring = getNextSubstring(fullInput, index, index + 2, token);
+ try {
+ timeZoneHours = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneHours < -15 || timeZoneHours > 15) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" to TZH because TZH range is -15 to +15. Pattern is " + pattern);
+ }
+ } else { // time zone minutes
+ substring = getNextSubstring(fullInput, index, token);
+ try {
+ timeZoneMinutes = Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to int. Pattern is " + pattern, e);
+ }
+ if (timeZoneMinutes < 0 || timeZoneMinutes > 59) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" to TZM because TZM range is 0 to 59. Pattern is " + pattern);
+ }
+ }
+ index += substring.length();
+ break;
+ case SEPARATOR:
+ index = parseSeparator(fullInput, index, token);
+ break;
+ case ISO_8601_DELIMITER:
+ index = parseIso8601Delimiter(fullInput, index, token);
+ }
+ }
+ // time zone hours -- process here because hh/hh24 may be parsed after tzh
+ ldt = ldt.minus(timeZoneSign * timeZoneHours, ChronoUnit.HOURS);
+ // time zone minutes -- process here because sign depends on tzh sign
+ ldt = ldt.minus(
+ timeZoneSign * timeZoneMinutes, ChronoUnit.MINUTES);
+
+ // anything left unparsed at end of string? throw error
+ if (!fullInput.substring(index).isEmpty()) {
+ throw new IllegalArgumentException("Leftover input after parsing: " +
+ fullInput.substring(index) + " in string " + fullInput);
+ }
+
+ return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano());
+ }
+
+ public Date parseDate(String input){
+ return Date.ofEpochMilli(parseTimestamp(input).toEpochMilli());
+ }
+ /**
+ * Return the next substring to parse. Length is either specified or token.length, but a
+ * separator or an ISO-8601 delimiter can cut the substring short. (e.g. if the token pattern is
+ * "YYYY" we expect the next 4 characters to be 4 numbers. However, if it is "976/" then we
+ * return "976" because a separator cuts it short.)
+ */
+ private String getNextSubstring(String s, int begin, Token token) {
+ return getNextSubstring(s, begin, begin + token.length, token);
+ }
+
+ private String getNextSubstring(String s, int begin, int end, Token token) {
+ if (end > s.length()) {
+ end = s.length();
+ }
+ s = s.substring(begin, end);
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ if (s.charAt(1) == 'm' || s.charAt(1) == 'M') { // length 2
+ return s.substring(0, 2);
+ } else {
+ return s;
+ }
+ }
+ for (String sep : VALID_SEPARATORS) {
+ if (s.contains(sep)) {
+ s = s.substring(0, s.indexOf(sep));
+ }
+ }
+ for (String delimiter : VALID_ISO_8601_DELIMITERS) { // this will cause problems with DAY (for example, Thursday starts with T)
+ if (s.toLowerCase().contains(delimiter)) {
+ s = s.substring(0, s.toLowerCase().indexOf(delimiter));
+ }
+ }
+
+ return s;
+ }
+
+ /**
+ * Get the integer value of a temporal substring.
+ */
+ private int parseTemporal(String substring, Token token){
+ // exceptions to the rule
+ if (token.temporalField == ChronoField.AMPM_OF_DAY) {
+ return substring.toLowerCase().startsWith("a") ? AM : PM;
+
+ } else if (token.temporalField == ChronoField.YEAR) {
+ String currentYearString = String.valueOf(LocalDateTime.now().getYear());
+ //deal with round years
+ if (token.string.startsWith("r") && substring.length() == 2) {
+ int currFirst2Digits = Integer.parseInt(currentYearString.substring(0, 2));
+ int currLast2Digits = Integer.parseInt(currentYearString.substring(2));
+ int valLast2Digits = Integer.parseInt(substring);
+ if (valLast2Digits < _50 && currLast2Digits >= _50) {
+ currFirst2Digits += 1;
+ } else if (valLast2Digits >= _50 && currLast2Digits < _50) {
+ currFirst2Digits -= 1;
+ }
+ substring = String.valueOf(currFirst2Digits) + substring;
+ } else { // fill in prefix digits with current date
+ substring = currentYearString.substring(0, 4 - substring.length()) + substring;
+ }
+
+ } else if (token.temporalField == ChronoField.NANO_OF_SECOND) {
+ int i = Integer.min(token.length, substring.length());
+ substring += StringUtils.repeat("0", NANOS_MAX_LENGTH - i);
+ }
+
+ // the rule
+ try {
+ return Integer.parseInt(substring);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Couldn't parse substring \"" + substring +
+ "\" with token " + token + " to integer. Pattern is " + pattern, e);
+ }
+ }
+
+ /**
+ * Parse the next separator(s). At least one separator character is expected. Separator
+ * characters are interchangeable.
+ *
+ * Caveat: If the last separator character in the separator substring is "-" and is immediately
+ * followed by a time zone hour (tzh) token, it's a negative sign and not counted as a
+ * separator, UNLESS this is the only separator character in the separator substring (in
+ * which case it is not counted as the negative sign).
+ *
+ * @throws IllegalArgumentException if separator is missing
+ */
+ private int parseSeparator(String fullInput, int index, Token token){
+ int separatorsFound = 0;
+ int begin = index;
+
+ while (index < fullInput.length() &&
+ VALID_SEPARATORS.contains(fullInput.substring(index, index + 1))) {
+ if (!isLastCharacterOfSeparator(index, fullInput) || !(nextTokenIs("tzh", token))
+ || separatorsFound == 0) {
+ separatorsFound++;
+ }
+ index++;
+ }
+
+ if (separatorsFound == 0) {
+ throw new IllegalArgumentException("Missing separator at index " + index);
+ }
+ return begin + separatorsFound;
+ }
+
+ private int parseIso8601Delimiter(String fullInput, int index, Token token) {
+ String substring;
+ substring = fullInput.substring(index, index + 1);
+ if (token.string.equalsIgnoreCase(substring)) {
+ index++;
+ } else {
+ throw new IllegalArgumentException(
+ "Missing ISO 8601 delimiter " + token.string.toUpperCase());
+ }
+ return index;
+ }
+
+ /**
+ * Is the next character something other than a separator?
+ */
+ private boolean isLastCharacterOfSeparator(int index, String string) {
+ if (index == string.length()-1) { // if we're at the end of the string, yes
+ return true;
+ }
+ return !VALID_SEPARATORS.contains(string.substring(index + 1, index + 2));
+ }
+
+ /**
+ * Does the temporalUnit/temporalField of the next token match the pattern's?
+ */
+ private boolean nextTokenIs(String pattern, Token currentToken) {
+ // make sure currentToken isn't the last one
+ if (tokens.indexOf(currentToken) == tokens.size() - 1) {
+ return false;
+ }
+ Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1);
+ pattern = pattern.toLowerCase();
+ return (VALID_TIME_ZONE_TOKENS.containsKey(pattern)
+ && VALID_TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit
+ || VALID_TEMPORAL_TOKENS.containsKey(pattern)
+ && VALID_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField);
+ }
+
+ @Override public String getPattern() {
+ return pattern;
+ }
+}
diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..08fc136a56
--- /dev/null
+++ common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java
index 6ecfcf65c9..cb8815d227 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Date.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Date.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,19 +17,14 @@
*/
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter;
+import java.lang.IllegalArgumentException;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
-import java.time.format.DateTimeFormatter;
-import java.time.format.DateTimeFormatterBuilder;
-import java.time.format.DateTimeParseException;
-import java.time.format.ResolverStyle;
-import java.time.format.SignStyle;
-
-import static java.time.temporal.ChronoField.DAY_OF_MONTH;
-import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
-import static java.time.temporal.ChronoField.YEAR;
/**
* This is the internal type for Date.
@@ -38,20 +33,6 @@
public class Date implements Comparable {
private static final LocalDate EPOCH = LocalDate.of(1970, 1, 1);
- private static final DateTimeFormatter PARSE_FORMATTER;
- private static final DateTimeFormatter PRINT_FORMATTER;
- static {
- DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
- builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL)
- .appendLiteral('-')
- .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL)
- .appendLiteral('-')
- .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL);
- PARSE_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
- builder = new DateTimeFormatterBuilder();
- builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
- PRINT_FORMATTER = builder.toFormatter();
- }
private LocalDate localDate;
@@ -69,7 +50,18 @@ public Date(Date d) {
@Override
public String toString() {
- return localDate.format(PRINT_FORMATTER);
+ return DefaultHiveSqlDateTimeFormatter.format(this);
+ }
+
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ try {
+ return formatter.format(this);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
}
public int hashCode() {
@@ -123,18 +115,14 @@ public void setTimeInMillis(long epochMilli) {
}
public static Date valueOf(String s) {
- s = s.trim();
- int idx = s.indexOf(" ");
- if (idx != -1) {
- s = s.substring(0, idx);
- }
- LocalDate localDate;
- try {
- localDate = LocalDate.parse(s, PARSE_FORMATTER);
- } catch (DateTimeParseException e) {
- throw new IllegalArgumentException("Cannot create date, parsing error");
+ return DefaultHiveSqlDateTimeFormatter.parseDate(s.trim());
+ }
+
+ public static Date valueOf(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return valueOf(s);
}
- return new Date(localDate);
+ return formatter.parseDate(s);
}
public static Date ofEpochDay(int epochDay) {
diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
index a8b7b6d186..66cd153e20 100644
--- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
+++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,22 +17,13 @@
*/
package org.apache.hadoop.hive.common.type;
+import org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
-import java.time.format.DateTimeFormatterBuilder;
-import java.time.format.DateTimeParseException;
-import java.time.format.ResolverStyle;
-import java.time.format.SignStyle;
-import java.time.temporal.ChronoField;
-
-import static java.time.temporal.ChronoField.DAY_OF_MONTH;
-import static java.time.temporal.ChronoField.HOUR_OF_DAY;
-import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
-import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
-import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
-import static java.time.temporal.ChronoField.YEAR;
/**
* This is the internal type for Timestamp.
@@ -43,35 +34,6 @@
public class Timestamp implements Comparable {
private static final LocalDateTime EPOCH = LocalDateTime.of(1970, 1, 1, 0, 0, 0);
- private static final DateTimeFormatter PARSE_FORMATTER;
- private static final DateTimeFormatter PRINT_FORMATTER;
-
- static {
- DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
- // Date part
- builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL)
- .appendLiteral('-')
- .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL)
- .appendLiteral('-')
- .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL);
- // Time part
- builder
- .optionalStart().appendLiteral(" ")
- .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL)
- .appendLiteral(':')
- .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL)
- .appendLiteral(':')
- .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL)
- .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd()
- .optionalEnd();
- PARSE_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
- builder = new DateTimeFormatterBuilder();
- // Date and time parts
- builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
- // Fractional part
- builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd();
- PRINT_FORMATTER = builder.toFormatter();
- }
private LocalDateTime localDateTime;
@@ -98,7 +60,18 @@ public String format(DateTimeFormatter formatter) {
@Override
public String toString() {
- return localDateTime.format(PRINT_FORMATTER);
+ return DefaultHiveSqlDateTimeFormatter.format(this);
+ }
+
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ try {
+ return formatter.format(this);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
}
public int hashCode() {
@@ -150,20 +123,16 @@ public int getNanos() {
return localDateTime.getNano();
}
+ //todo frogmethod throws illegalargumetnexception "Cannot create timestamp, parsing error"
public static Timestamp valueOf(String s) {
- s = s.trim();
- LocalDateTime localDateTime;
- try {
- localDateTime = LocalDateTime.parse(s, PARSE_FORMATTER);
- } catch (DateTimeParseException e) {
- // Try ISO-8601 format
- try {
- localDateTime = LocalDateTime.parse(s);
- } catch (DateTimeParseException e2) {
- throw new IllegalArgumentException("Cannot create timestamp, parsing error");
- }
+ return DefaultHiveSqlDateTimeFormatter.parseTimestamp(s.trim());
+ }
+
+ public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return valueOf(s);
}
- return new Timestamp(localDateTime);
+ return formatter.parseTimestamp(s);
}
public static Timestamp ofEpochSecond(long epochSecond) {
diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
index f26f8ae01e..fe03ad55f0 100644
--- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
+++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java
@@ -22,7 +22,6 @@
import java.math.BigDecimal;
import java.time.DateTimeException;
-import java.time.format.DateTimeParseException;
/**
* Utilities for Timestamps and the relevant conversions.
@@ -168,27 +167,4 @@ public static long millisToSeconds(long millis) {
return (millis - 999) / 1000;
}
}
-
- private static final int DATE_LENGTH = "YYYY-MM-DD".length();
-
- public static Timestamp stringToTimestamp(String s) {
- s = s.trim();
- // Handle simpler cases directly avoiding exceptions
- if (s.length() == DATE_LENGTH) {
- // Its a date!
- return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli());
- }
- try {
- return Timestamp.valueOf(s);
- } catch (IllegalArgumentException eT) {
- // Try zoned timestamp
- try {
- return Timestamp.valueOf(
- TimestampTZUtil.parse(s).getZonedDateTime().toLocalDateTime().toString());
- } catch (IllegalArgumentException | DateTimeParseException eTZ) {
- // Last attempt
- return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli());
- }
- }
- }
}
diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java
index 5db14f1906..22bcd98c1d 100644
--- common/src/java/org/apache/hive/common/util/DateParser.java
+++ common/src/java/org/apache/hive/common/util/DateParser.java
@@ -17,6 +17,7 @@
*/
package org.apache.hive.common.util;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
/**
@@ -36,9 +37,13 @@ public Date parseDate(String strValue) {
}
public boolean parseDate(String strValue, Date result) {
+ return parseDate(strValue, result, null);
+ }
+
+ public boolean parseDate(String strValue, Date result, HiveDateTimeFormatter formatter) {
Date parsedVal;
try {
- parsedVal = Date.valueOf(strValue);
+ parsedVal = Date.valueOf(strValue, formatter);
} catch (IllegalArgumentException e) {
parsedVal = null;
}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestDefaultHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestDefaultHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..db2c8b613a
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestDefaultHiveSqlDateTimeFormatter.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import static org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter.getNumberOfTokenGroups;
+import static org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter.parseDate;
+import static org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter.parseTimestamp;
+
+public class TestDefaultHiveSqlDateTimeFormatter extends TestCase {
+ private static final Timestamp _2019_01_01__02_03_04 = Timestamp.ofEpochMilli(1546308184000L);
+ private static final Date _2019_01_01 = Date.ofEpochMilli(1546300800000L);
+
+
+ private Timestamp timestamp(Timestamp input, int nanos) {
+ Timestamp output = (Timestamp) input.clone();
+ output.setNanos(nanos);
+ return output;
+ }
+
+ public void testFormatTimestamp() {
+ String s1 = "2019-01-01 02:03:04";
+ String s2 = "2019-01-01 02:03:04.44444";
+ String s3 = "2019-01-01 02:03:04.444444444";
+ assertEquals(s1, DefaultHiveSqlDateTimeFormatter.format(_2019_01_01__02_03_04));
+ assertEquals(s2, DefaultHiveSqlDateTimeFormatter.format(timestamp(_2019_01_01__02_03_04, 444440000)));
+ assertEquals(s3, DefaultHiveSqlDateTimeFormatter.format(timestamp(_2019_01_01__02_03_04, 444444444)));
+ }
+
+ public void testFormatDate() {
+ String s1 = "2019-01-01";
+ assertEquals(s1, DefaultHiveSqlDateTimeFormatter.format(_2019_01_01));
+ }
+
+ public void testParseTimestamp() {
+ String s1 = "2019-01-01 02:03:04";
+ String s2 = "2019-01-01 02:03:04.000";
+ String s3 = "2019-01-01T02:03:04Z";
+ String s4 = "2019-01-01 02:03:04.44444";
+ String s5 = "2019-01-01T02:03:04.44444Z";
+ String s6 = "2019.01.01T02....03:04..44444Z";
+
+ assertEquals(_2019_01_01__02_03_04, parseTimestamp(s1));
+ assertEquals(_2019_01_01__02_03_04, parseTimestamp(s2));
+ assertEquals(_2019_01_01__02_03_04, parseTimestamp(s3));
+ assertEquals(timestamp(_2019_01_01__02_03_04, 444440000), parseTimestamp(s4));
+ assertEquals(timestamp(_2019_01_01__02_03_04, 444440000), parseTimestamp(s5));
+ assertEquals(timestamp(_2019_01_01__02_03_04, 444440000), parseTimestamp(s6));
+ }
+
+ public void testParseDate() {
+ String s1 = "2019/01///01";
+ String s2 = "19/01///01";
+ String s3 = "2019-01-01T02:03:04Z";
+ String s4 = "2019-01-01 02:03:04.44444";
+ String s5 = "2019-01-01T02:03:04.44444Z";
+ String s6 = "2019.01.01T02....03:04..44444";
+// assertEquals(_2019_01_01, parseDate(s1));
+// assertEquals(_2019_01_01, parseDate(s2));
+// assertEquals(_2019_01_01, parseDate(s3));
+// assertEquals(_2019_01_01, parseDate(s4));
+// assertEquals(_2019_01_01, parseDate(s5));
+ assertEquals(_2019_01_01, parseDate(s6));
+ }
+
+ public void testGetNumberOfTokenGroups() {
+ assertEquals(4, getNumberOfTokenGroups("2018..39..7T urkey"));
+ assertEquals(8, getNumberOfTokenGroups("2019-01-01T02:03:04Z"));
+ assertEquals(3, getNumberOfTokenGroups("2019-01-01GMT"));
+ assertEquals(4, getNumberOfTokenGroups("2019-01-01Turkey"));
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
new file mode 100644
index 0000000000..63cea8b98d
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
+
+import com.sun.tools.javac.util.List;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.format.ResolverStyle;
+import java.time.format.SignStyle;
+import java.time.temporal.ChronoField;
+import java.time.temporal.TemporalField;
+import java.util.ArrayList;
+
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+
+/**
+ * Test class for HiveSqlDateTimeFormatter.
+ */
+
+public class TestHiveSqlDateTimeFormatter extends TestCase {
+
+ private HiveSqlDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+
+ public void testSetPattern() {
+ verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", new ArrayList<>(List.of(
+ null,
+ ChronoField.YEAR,
+ null,
+ ChronoField.MONTH_OF_YEAR,
+ null,
+ ChronoField.DAY_OF_MONTH,
+ null
+ )));
+
+ verifyPatternParsing("ymmdddhh24::mi:ss A.M. pm", 25, "ymmdddhh24::mi:ss A.M. pm",
+ new ArrayList<>(List.of(
+ ChronoField.YEAR,
+ ChronoField.MONTH_OF_YEAR,
+ ChronoField.DAY_OF_YEAR,
+ ChronoField.HOUR_OF_DAY,
+ null, ChronoField.MINUTE_OF_HOUR,
+ null, ChronoField.SECOND_OF_MINUTE,
+ null, ChronoField.AMPM_OF_DAY,
+ null, ChronoField.AMPM_OF_DAY
+ )));
+ }
+
+ public void testSetPatternWithBadPatterns() {
+ verifyBadPattern("e", true);
+ verifyBadPattern("yyyy-1", true);
+
+ verifyBadPattern("yyyy Y", true);
+ verifyBadPattern("yyyy R", true);
+ verifyBadPattern("yyyy-MM-DDD", true);
+ verifyBadPattern("yyyy-mm-DD DDD", true);
+ verifyBadPattern("yyyy-mm-dd HH24 HH12", true);
+ verifyBadPattern("yyyy-mm-dd HH24 AM", true);
+ verifyBadPattern("yyyy-mm-dd HH24 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd HH12 SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SSSSS AM", true);
+ verifyBadPattern("yyyy-mm-dd MI SSSSS", true);
+ verifyBadPattern("yyyy-mm-dd SS SSSSS", true);
+
+ verifyBadPattern("tzm", false);
+ verifyBadPattern("tzh", false);
+ }
+
+ public void testFormatTimestamp() {
+ checkFormatTs("rr rrrr ddd", "2018-01-03 00:00:00", "18 2018 003");
+ checkFormatTs("yyyy-mm-ddtsssss.ff4z", "2018-02-03 00:00:10.777777777", "2018-02-03T00010.7777Z");
+ checkFormatTs("hh24:mi:ss.ff1", "2018-02-03 01:02:03.999999999", "01:02:03.9");
+ checkFormatTs("y yyy hh:mi:ss.ffz", "2018-02-03 01:02:03.0070070", "8 018 01:02:03.007007Z");
+ checkFormatTs("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03 01:02:03.0070070", "am a.m. am a.m. AM A.M. AM A.M.");
+ }
+
+ private void checkFormatTs(String pattern, String input, String expectedOutput) {
+ formatter.setPattern(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toTimestamp(input)));
+ }
+
+ public void testFormatDate() {
+ checkFormatDate("rr rrrr ddd", "2018-01-03", "18 2018 003");
+ checkFormatDate("yyyy-mm-ddtsssss.ff4z", "2018-02-03", "2018-02-03T00000.0000Z");
+ checkFormatDate("hh24:mi:ss.ff1", "2018-02-03", "00:00:00.0");
+ checkFormatDate("y yyy T hh:mi:ss.ffz", "2018-02-03", "8 018 T 00:00:00.0Z");
+ checkFormatDate("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03", "am a.m. am a.m. AM A.M. AM A.M.");
+ checkFormatDate("DDD", "2019-12-31", "365");
+ checkFormatDate("DDD", "2020-12-31", "366");
+ }
+
+ private void checkFormatDate(String pattern, String input, String expectedOutput) {
+ formatter.setPattern(pattern, false);
+ assertEquals(expectedOutput, formatter.format(toDate(input)));
+ }
+
+ public void testParseTimestamp() {
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ff8z", "2018-02-03T04:05:06.5665Z", "2018-02-03 04:05:06.5665");
+ checkParseTimestamp("yyyy-mm-dd hh24:mi:ss.ff", "2018-02-03 04:05:06.555555555", "2018-02-03 04:05:06.555555555");
+ checkParseTimestamp("yy-mm-dd hh12:mi:ss", "99-2-03 04:05:06", "2099-02-03 04:05:06");
+ checkParseTimestamp("rr-mm-dd", "00-02-03", "2000-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "49-02-03", "2049-02-03 00:00:00");
+ checkParseTimestamp("rr-mm-dd", "50-02-03", "1950-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "00-02-03", "2000-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "49-02-03", "2049-02-03 00:00:00");
+ checkParseTimestamp("rrrr-mm-dd", "50-02-03", "1950-02-03 00:00:00");
+ checkParseTimestamp("yyy-mm-dd","018-01-01","2018-01-01 00:00:00");
+ checkParseTimestamp("yyyyddd", "2018284","2018-10-11 00:00:00");
+ checkParseTimestamp("yyyyddd", "20184","2018-01-04 00:00:00");
+ checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ffz", "2018-02-03t04:05:06.444Z","2018-02-03 04:05:06.444");
+ checkParseTimestamp("hh:mi:ss A.M.", "04:05:06 P.M.","1970-01-01 16:05:06");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00--1:-30","2019-01-01 15:30:00");
+ checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00-1:30","2019-01-01 12:30:00");
+ checkParseTimestamp("TZM:TZH", "1 -3","1970-01-01 03:01:00");
+ checkParseTimestamp("TZH:TZM", "-0:30","1970-01-01 00:30:00");
+ checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02","2333-01-01 13:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI AM", "2019-01-01 11:00 p.m.","2019-01-01 23:00:00");
+ checkParseTimestamp("YYYY-MM-DD HH12:MI A.M..", "2019-01-01 11:00 pm.","2019-01-01 23:00:00");
+
+ //Test "day in year" token in a leap year scenario
+ checkParseTimestamp("YYYY DDD", "2000 60", "2000-02-29 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 61", "2000-03-01 00:00:00");
+ checkParseTimestamp("YYYY DDD", "2000 366", "2000-12-31 00:00:00");
+ //Test timezone offset parsing without separators
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 02:45:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 02:45:00");
+ checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 13:15:00");
+ }
+
+ private void checkParseTimestamp(String pattern, String input, String expectedOutput) {
+ formatter.setPattern(pattern, true);
+ assertEquals(toTimestamp(expectedOutput), formatter.parseTimestamp(input));
+ }
+
+ public void testParseDate() {
+ checkParseDate("yyyy-mm-dd hh mi ss", "2018/01/01 2.2.2", "2018-01-01");
+ checkParseDate("rr-mm-dd", "00-02-03", "2000-02-03");
+ checkParseDate("rr-mm-dd", "49-02-03", "2049-02-03");
+ checkParseDate("rr-mm-dd", "50-02-03", "1950-02-03");
+ }
+
+ private void checkParseDate(String pattern, String input, String expectedOutput) {
+ formatter.setPattern(pattern, true);
+ assertEquals(toDate(expectedOutput), formatter.parseDate(input));
+ }
+
+ public void testParseTimestampError() {
+ verifyBadParseString("yyyy", "2019-02-03");
+ verifyBadParseString("yyyy-mm-dd ", "2019-02-03"); //separator missing
+ verifyBadParseString("yyyy-mm-dd", "2019-02-03..."); //extra separators
+ verifyBadParseString("yyyy-mm-dd hh12:mi:ss", "2019-02-03 14:00:00"); //hh12 out of range
+ verifyBadParseString("yyyy-dddsssss", "2019-912345");
+ verifyBadParseString("yyyy-mm-dd", "2019-13-23"); //mm out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range
+ verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range
+ verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range
+ }
+
+ private void verifyBadPattern(String string, boolean forParsing) {
+ try {
+ formatter.setPattern(string, forParsing);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+ }
+ }
+
+ /**
+ * Checks:
+ * -token.temporalField for each token
+ * -sum of token.lengths
+ * -concatenation of token.strings
+ */
+ private void verifyPatternParsing(String pattern, ArrayList temporalFields) {
+ verifyPatternParsing(pattern, pattern.length(), pattern.toLowerCase(), temporalFields);
+ }
+
+ private void verifyPatternParsing(String pattern, int expectedPatternLength,
+ String expectedPattern, ArrayList temporalFields) {
+ formatter.setPattern(pattern, false);
+ assertEquals(temporalFields.size(), formatter.tokens.size());
+ StringBuilder sb = new StringBuilder();
+ int actualPatternLength = 0;
+ for (int i = 0; i < temporalFields.size(); i++) {
+ assertEquals("Generated list of tokens not correct", temporalFields.get(i),
+ formatter.tokens.get(i).temporalField);
+ sb.append(formatter.tokens.get(i).string);
+ actualPatternLength += formatter.tokens.get(i).length;
+ }
+ assertEquals("Token strings concatenated don't match original pattern string",
+ expectedPattern, sb.toString());
+ assertEquals(expectedPatternLength, actualPatternLength);
+ }
+
+ private void verifyBadParseString(String pattern, String string) {
+ try {
+ formatter.setPattern(pattern, true);
+ formatter.parseTimestamp(string);
+ fail();
+ } catch (Exception e) {
+ assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName());
+ }
+ }
+
+
+ // Methods that construct datetime objects using java.time.DateTimeFormatter.
+
+ public static Date toDate(String s) {
+ LocalDate localDate = LocalDate.parse(s, DATE_FORMATTER);
+ return Date.ofEpochDay((int) localDate.toEpochDay());
+ }
+
+ /**
+ * This is effectively the old Timestamp.valueOf method.
+ */
+ public static Timestamp toTimestamp(String s) {
+ LocalDateTime localDateTime = LocalDateTime.parse(s.trim(), TIMESTAMP_FORMATTER);
+ return Timestamp.ofEpochSecond(
+ localDateTime.toEpochSecond(ZoneOffset.UTC), localDateTime.getNano());
+ }
+
+ private static final DateTimeFormatter DATE_FORMATTER =
+ DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ private static final DateTimeFormatter TIMESTAMP_FORMATTER;
+ static {
+ DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
+ builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL).appendLiteral('-')
+ .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendLiteral(" ")
+ .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL)
+ .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd()
+ .optionalEnd();
+ TIMESTAMP_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT);
+ }
+}
diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
new file mode 100644
index 0000000000..7a8d9f0394
--- /dev/null
+++ common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.format.datetime;
\ No newline at end of file
diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java
index 00a7904ecf..5bf1119cef 100644
--- common/src/test/org/apache/hive/common/util/TestTimestampParser.java
+++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java
@@ -116,8 +116,7 @@ public void testPattern1() {
};
String[] invalidCases = {
- "1945-12-31-23:59:59",
- "12345",
+ "12345"
};
testValidCases(tp, validCases);
@@ -147,8 +146,7 @@ public void testMillisParser() {
};
String[] invalidCases = {
- "1945-12-31-23:59:59",
- "1420509274123-",
+ "1420509274123-"
};
testValidCases(tp, validCases);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index fa9d1e9783..e3339d7539 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -42,8 +42,11 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToVarCharViaLongToVarChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastCharToBinary;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString;
@@ -67,10 +70,13 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToStringWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarChar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarCharWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64ColumnInList;
@@ -3139,9 +3145,17 @@ private VectorExpression getCastToString(List childExpr, TypeInfo
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else { //second argument will be format string
+ return createVectorExpression(CastDateToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else { //second argument will be format string
+ return createVectorExpression(CastTimestampToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
// STRING and VARCHAR types require no conversion, so use a no-op.
@@ -3173,9 +3187,17 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastDateToCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastTimestampToCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
@@ -3203,9 +3225,17 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo
} else if (isDecimalFamily(inputType)) {
return createVectorExpression(CastDecimalToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isDateFamily(inputType)) {
- return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastDateToVarCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isTimestampFamily(inputType)) {
- return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ if (childExpr.size() < 2) {
+ return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ } else {
+ return createVectorExpression(CastTimestampToVarCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
+ }
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringGroupToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java
new file mode 100644
index 0000000000..e1394021fb
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+public class CastDateToCharWithFormat extends CastDateToChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToCharWithFormat() {
+ super();
+ }
+
+ public CastDateToCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to char with format ),"
+ + " but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
index dfa9f8a00d..dee70c7f61 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java
@@ -18,28 +18,20 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.serde2.io.DateWritableV2;
-
-import java.sql.Date;
-import java.text.SimpleDateFormat;
-import java.util.TimeZone;
public class CastDateToString extends LongToStringUnaryUDF {
private static final long serialVersionUID = 1L;
- protected transient Date dt = new Date(0);
- private transient SimpleDateFormat formatter;
public CastDateToString() {
super();
- formatter = new SimpleDateFormat("yyyy-MM-dd");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
}
public CastDateToString(int inputColumn, int outputColumnNum) {
super(inputColumn, outputColumnNum);
- formatter = new SimpleDateFormat("yyyy-MM-dd");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
}
// The assign method will be overridden for CHAR and VARCHAR.
@@ -47,10 +39,25 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
outV.setVal(i, bytes, 0, length);
}
+ private void assignNull(BytesColumnVector outV, int i) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+
@Override
protected void func(BytesColumnVector outV, long[] vector, int i) {
- dt.setTime(DateWritableV2.daysToMillis((int) vector[i]));
- byte[] temp = formatter.format(dt).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, vector, i, null);
+ }
+
+ protected void func(BytesColumnVector outV, long[] vector, int i, HiveDateTimeFormatter formatter) {
+ try {
+ Date date = Date.ofEpochDay((int) vector[i]);
+ String output = (formatter != null) ? formatter.format(date) :
+ DefaultHiveSqlDateTimeFormatter.format(date);
+ byte[] temp = output.getBytes();
+ assign(outV, i, temp, temp.length);
+ } catch (Exception e) {
+ assignNull(outV, i);
+ }
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
new file mode 100644
index 0000000000..b05c2cb521
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastDateToStringWithFormat extends CastDateToString {
+ private static final long serialVersionUID = 1L;
+ protected transient Date dt;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToStringWithFormat() {
+ super();
+ }
+
+ public CastDateToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to string with format ),"
+ + " but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ // The assign method will be overridden for CHAR and VARCHAR.
+ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
+ outV.setVal(i, bytes, 0, length);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.INT_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java
new file mode 100644
index 0000000000..8fd8c30b13
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+public class CastDateToVarCharWithFormat extends CastDateToVarChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastDateToVarCharWithFormat() {
+ super();
+ }
+
+ public CastDateToVarCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to varchar with format ),"
+ + " but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ super.func(outV, vector, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index a6dff12e1a..44a451b3bc 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -151,10 +152,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ /**
+ * Used by CastStringToDate.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
+ evaluate(outputColVector, inV, i, null);
+ }
+
+ /**
+ * Used by CastStringToDateWithFormat.
+ */
+ protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i,
+ HiveDateTimeFormatter formatter) {
String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
Date hDate = new Date();
- if (dateParser.parseDate(dateString, hDate)) {
+ if (dateParser.parseDate(dateString, hDate, formatter)) {
outputColVector.vector[i] = DateWritableV2.dateToDays(hDate);
return;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
new file mode 100644
index 0000000000..ba5c12f61d
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO DATE WITH FORMAT ).
+ */
+public class CastStringToDateWithFormat extends CastStringToDate {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToDateWithFormat() {
+ super();
+ }
+
+ public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to date with format ),"
+ + " but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), true);
+ }
+
+ @Override
+ protected void evaluate(LongColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
index b48b0136eb..f8d81cdb13 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java
@@ -19,8 +19,9 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.util.Arrays;
-import java.sql.Timestamp;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -143,21 +144,40 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException {
}
}
- private void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ /**
+ * This is used by CastStringToTimestamp.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
+ evaluate(outputColVector, inputColVector, i, null);
+ }
+
+ /**
+ * This is used by CastStringToTimestampWithFormat.
+ */
+ protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector,
+ int i, HiveDateTimeFormatter formatter) {
try {
- org.apache.hadoop.hive.common.type.Timestamp timestamp =
- PrimitiveObjectInspectorUtils.getTimestampFromString(
+ Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestampFromString(
new String(
inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i],
- "UTF-8"));
- outputColVector.set(i, timestamp.toSqlTimestamp());
+ "UTF-8"),
+ formatter);
+ if (timestamp != null) {
+ outputColVector.set(i, timestamp.toSqlTimestamp());
+ } else {
+ setNullValue(outputColVector, i);
+ }
} catch (Exception e) {
- outputColVector.setNullValue(i);
- outputColVector.isNull[i] = true;
- outputColVector.noNulls = false;
+ setNullValue(outputColVector, i);
}
}
+ private void setNullValue(TimestampColumnVector outputColVector, int i) {
+ outputColVector.setNullValue(i);
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+
@Override
public String vectorExpressionParameters() {
return getColumnParamString(0, inputColumn);
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
new file mode 100644
index 0000000000..a8a3749fad
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO TIMESTAMP WITH FORMAT ).
+ */
+public class CastStringToTimestampWithFormat extends CastStringToTimestamp {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastStringToTimestampWithFormat() {
+ super();
+ }
+
+ public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to timestamp with format"
+ + "), but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), true);
+ }
+
+ @Override
+ protected void evaluate(TimestampColumnVector outputColVector,
+ BytesColumnVector inputColVector, int i) {
+ super.evaluate(outputColVector, inputColVector, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java
new file mode 100644
index 0000000000..4fd80225a0
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+public class CastTimestampToCharWithFormat extends CastTimestampToChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToCharWithFormat() {
+ super();
+ }
+
+ public CastTimestampToCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to char with format ),"
+ + " but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
index adc3a9d7b9..368e51456d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java
@@ -18,29 +18,14 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.common.format.datetime.DefaultHiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import java.sql.Timestamp;
-import java.time.Instant;
-import java.time.LocalDateTime;
-import java.time.ZoneOffset;
-import java.time.format.DateTimeFormatter;
-import java.time.format.DateTimeFormatterBuilder;
-import java.time.temporal.ChronoField;
-
public class CastTimestampToString extends TimestampToStringUnaryUDF {
private static final long serialVersionUID = 1L;
- private static final DateTimeFormatter PRINT_FORMATTER;
-
- static {
- DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
- // Date and time parts
- builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
- // Fractional part
- builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd();
- PRINT_FORMATTER = builder.toFormatter();
- }
public CastTimestampToString() {
super();
@@ -55,18 +40,29 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) {
outV.setVal(i, bytes, 0, length);
}
+ private void assignNull(BytesColumnVector outV, int i) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+
@Override
protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
- byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC)
- .withNano(inV.nanos[i])
- .format(PRINT_FORMATTER).getBytes();
- assign(outV, i, temp, temp.length);
+ func(outV, inV, i, null);
}
- public static String getTimestampString(Timestamp ts) {
- return
- LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC)
- .withNano(ts.getNanos())
- .format(PRINT_FORMATTER);
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i, HiveDateTimeFormatter formatter) {
+ try {
+ Timestamp timestamp = Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i]);
+ String output = (formatter != null) ? formatter.format(timestamp) :
+ DefaultHiveSqlDateTimeFormatter.format(timestamp);
+ byte[] temp = output.getBytes();
+ assign(outV, i, temp, temp.length);
+ } catch (Exception e) {
+ assignNull(outV, i);
+ }
+ }
+ public static String getTimestampString(java.sql.Timestamp ts) {
+ return DefaultHiveSqlDateTimeFormatter.format(
+ Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()));
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
new file mode 100644
index 0000000000..375634c2a8
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Vectorized UDF for CAST ( TO STRING WITH FORMAT ).
+ */
+public class CastTimestampToStringWithFormat extends CastTimestampToString {
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToStringWithFormat() {
+ super();
+ }
+
+ public CastTimestampToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to string with format"
+ + " ), but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.TIMESTAMP,
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java
new file mode 100644
index 0000000000..b3bf7e4b14
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+
+import java.nio.charset.StandardCharsets;
+
+public class CastTimestampToVarCharWithFormat extends CastTimestampToVarChar {
+
+ private static final long serialVersionUID = 1L;
+ private HiveDateTimeFormatter formatter;
+
+ public CastTimestampToVarCharWithFormat() {
+ super();
+ }
+
+ public CastTimestampToVarCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) {
+ super(inputColumn, outputColumnNum);
+
+ if (patternBytes == null) {
+ throw new IllegalStateException("Tried to cast ( to varchar with format"
+ + "), but not found");
+ }
+ formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) {
+ super.func(outV, inV, i, formatter);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern();
+ }
+}
\ No newline at end of file
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index f22511ad67..2a65f0e74e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -248,7 +248,8 @@ castExpression
expression
KW_AS
primitiveType
- RPAREN -> ^(TOK_FUNCTION primitiveType expression)
+ (KW_FORMAT expression)?
+ RPAREN -> ^(TOK_FUNCTION primitiveType expression*)
;
caseExpression
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
index bcc4114099..6c3c3349bb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import java.text.SimpleDateFormat;
-
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol;
@@ -46,7 +44,6 @@
+ " '2009-07-29'")
@VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class})
public class GenericUDFDateSub extends GenericUDFDateAdd {
- private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
public GenericUDFDateSub() {
this.signModifier = -1;
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
index 70f57b7727..83cf02866a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
@@ -17,8 +17,8 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -29,9 +29,9 @@
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestamp;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestampWithFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter;
@@ -48,9 +48,12 @@
*
*/
@Description(name = "timestamp",
-value = "cast(date as timestamp) - Returns timestamp")
+ value = "cast( as timestamp [format ]) - Returns timestamp",
+ extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used.")
@VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class,
- CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class})
+ CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class,
+ CastStringToTimestampWithFormat.class})
public class GenericUDFTimestamp extends GenericUDF {
private transient PrimitiveObjectInspector argumentOI;
@@ -88,6 +91,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
tc.setIntToTimestampInSeconds(intToTimestampInSeconds);
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1), true);
+ tc.setDateTimeFormatter(formatter);
+ }
+
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
}
@@ -97,17 +107,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (o0 == null) {
return null;
}
-
return tc.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (1 <= children.length && children.length <= 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS TIMESTAMP)");
+ sb.append(" AS TIMESTAMP");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
index 899abf76b8..8d02bd6e4a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java
@@ -19,6 +19,8 @@
import java.io.Serializable;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -36,7 +38,8 @@
@Description(name = "char",
value = "CAST( as CHAR(length)) - Converts the argument to a char value.",
extended = "Values will be truncated if the input value is too long to fit"
-+ " within the char length."
++ " within the char length. If format is specified with FORMAT argument then SQL:2016 datetime"
++ " formats will be used.\\n\""
+ "Example:\n "
+ " > SELECT CAST(1234 AS char(10)) FROM src LIMIT 1;\n"
+ " '1234'")
@@ -55,7 +58,7 @@ public GenericUDFToChar() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("CHAR cast requires a value argument");
}
try {
@@ -71,6 +74,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new HiveCharConverter(argumentOI, outputOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1), false);
+ converter.setDateTimeFormatter(formatter);
+ }
+
return outputOI;
}
@@ -86,13 +97,17 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
sb.append(" AS CHAR(");
sb.append("" + typeInfo.getLength());
sb.append(")");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
sb.append(")");
return sb.toString();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
index c309ffa5e3..12d9a48acf 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
@@ -17,11 +17,14 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDateWithFormat;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -36,12 +39,15 @@
* GenericUDFToDate
*/
@Description(name = "date",
- value = "CAST( as DATE) - Returns the date represented by the date string.",
- extended = "date_string is a string in the format 'yyyy-MM-dd.'"
+ value = "CAST( as DATE [FORMAT ]) - Returns the date represented by the date string.",
+ extended = "date_string is a string in the format 'yyyy-MM-dd.' "
+ + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be "
+ + "used for parsing."
+ "Example:\n "
+ " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n"
+ " '2009-01-01'")
-@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class})
+@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class,
+ CastStringToDateWithFormat.class})
public class GenericUDFToDate extends GenericUDF {
private transient PrimitiveObjectInspector argumentOI;
@@ -75,6 +81,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
dc = new DateConverter(argumentOI,
PrimitiveObjectInspectorFactory.writableDateObjectInspector);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1), true);
+ dc.setDateTimeFormatter(formatter);
+ }
+
return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
}
@@ -90,11 +104,16 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS DATE)");
+ sb.append(" AS DATE");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
index d5764419d6..375f40471e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -28,10 +30,12 @@
import org.slf4j.LoggerFactory;
@Description(name = "string",
-value = "CAST( as STRING) - Converts the argument to a string value.",
-extended = "Example:\n "
-+ " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n"
-+ " '1234'")
+ value = "CAST( as STRING [FORMAT ]) - Converts the argument to a string value.",
+ extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will "
+ + "be used.\n"
+ + "Example:\n "
+ + " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n"
+ + " '1234'")
public class GenericUDFToString extends GenericUDF {
private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToString.class.getName());
@@ -43,7 +47,7 @@ public GenericUDFToString() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("STRING cast requires a value argument");
}
try {
@@ -54,26 +58,39 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
}
converter = new TextConverter(argumentOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1), false);
+ converter.setDateTimeFormatter(formatter);
+ }
+
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
- Object o0 = arguments[0].get();
- if (o0 == null) {
- return null;
- }
+ Object o0 = arguments[0].get();
+ if (o0 == null) {
+ return null;
+ }
- return converter.convert(o0);
+ return converter.convert(o0);
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
- sb.append(" AS STRING)");
+ sb.append(" AS STRING");
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
+ sb.append(")");
return sb.toString();
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
index b9a2bc2b9f..a4089fb7fd 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java
@@ -19,6 +19,8 @@
import java.io.Serializable;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -36,7 +38,8 @@
@Description(name = "varchar",
value = "CAST( as VARCHAR(length)) - Converts the argument to a varchar value.",
extended = "Values will be truncated if the input value is too long to fit"
-+ " within the varchar length."
++ " within the varchar length. If format is specified with FORMAT argument then SQL:2016 datetime"
++ " formats will be used.\n"
+ "Example:\n "
+ " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n"
+ " '1234'")
@@ -55,7 +58,7 @@ public GenericUDFToVarchar() {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 1) {
+ if (arguments.length < 1) {
throw new UDFArgumentException("VARCHAR cast requires a value argument");
}
try {
@@ -71,6 +74,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
converter = new HiveVarcharConverter(argumentOI, outputOI);
+
+ // for CAST WITH FORMAT
+ if (arguments.length > 1 && arguments[1] != null) {
+ HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter();
+ formatter.setPattern(getConstantStringValue(arguments, 1), false);
+ converter.setDateTimeFormatter(formatter);
+ }
+
return outputOI;
}
@@ -86,12 +97,16 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 1);
+ assert (children.length == 1 || children.length == 2);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
sb.append(" AS ");
sb.append(typeInfo.getQualifiedName());
+ if (children.length == 2) {
+ sb.append(" FORMAT ");
+ sb.append(children[1]);
+ }
sb.append(")");
return sb.toString();
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
index 663237739e..314e394d67 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
@@ -22,6 +22,7 @@
import java.util.Arrays;
import java.util.Random;
+import org.apache.hadoop.hive.common.type.Date;
import org.junit.Assert;
import org.apache.hadoop.hive.serde2.RandomTypeUtil;
@@ -115,18 +116,20 @@ public void testRoundToDecimalPlaces() throws HiveException {
Assert.assertEquals(1.2346d, resultV.vector[7], Double.MIN_VALUE);
}
- static int DAYS_LIMIT = 365 * 9999;
+ private static final int DAYS_LIMIT = 365 * 9999;
+ //approximate, so we get some negative values:
+ private static final int SMALLEST_EPOCH_DAY = -365 * 1969;
public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) {
Random r = new Random(12099);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector inV;
TimestampColumnVector outV;
- inV = new LongColumnVector();
- outV = new TimestampColumnVector();
+ inV = new LongColumnVector(intValues.length);
+ outV = new TimestampColumnVector(intValues.length);
for (int i = 0; i < intValues.length; i++) {
- intValues[i] = r.nextInt() % DAYS_LIMIT;
+ intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT;
inV.vector[i] = intValues[i];
}
@@ -137,6 +140,36 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) {
+ // get date in timestamp out, and change timestamp out to string out
+ VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues);
+ BytesColumnVector outV = new BytesColumnVector(intValues.length);
+ batch.cols[1] = outV;
+ return batch;
+ }
+
+ // For testing CastDateToStringWithFormat with
+ // TestVectorTypeCastsWithFormat#testCastDateToStringWithFormat
+ public static VectorizedRowBatch getVectorizedRowBatchDateInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ LongColumnVector dateColumnV;
+ BytesColumnVector stringColumnV;
+ dateColumnV = new LongColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ dateColumnV.vector[0] = Date.valueOf("2019-12-31").toEpochDay();
+ dateColumnV.vector[1] = Date.valueOf("1776-07-04").toEpochDay();
+ dateColumnV.vector[2] = Date.valueOf("2012-02-29").toEpochDay();
+ dateColumnV.vector[3] = Date.valueOf("1580-08-08").toEpochDay();
+ dateColumnV.vector[4] = Date.valueOf("0005-01-01").toEpochDay();
+ dateColumnV.vector[5] = Date.valueOf("9999-12-31").toEpochDay();
+
+ batch.cols[0] = dateColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() {
VectorizedRowBatch batch = new VectorizedRowBatch(2);
LongColumnVector lcv;
@@ -277,6 +310,42 @@ public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() {
return batch;
}
+ public static VectorizedRowBatch getVectorizedRowBatchStringInTimestampOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("2019-12-31 00:00:00.999999999").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776-07-04 17:07:06.177617761").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012-02-29 23:59:59.999999999").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580-08-08 00:00:00.0").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005-01-01 00:00:00.0").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999-12-31 23:59:59.999999999").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchStringInDateOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ BytesColumnVector inV;
+ inV = new BytesColumnVector();
+ inV.initBuffer();
+ inV.setVal(0, StandardCharsets.UTF_8.encode("19/12/31").array());
+ inV.setVal(1, StandardCharsets.UTF_8.encode("1776--07--04").array());
+ inV.setVal(2, StandardCharsets.UTF_8.encode("2012/02/29").array());
+ inV.setVal(3, StandardCharsets.UTF_8.encode("1580/08/08").array());
+ inV.setVal(4, StandardCharsets.UTF_8.encode("0005/01/01").array());
+ inV.setVal(5, StandardCharsets.UTF_8.encode("9999/12/31").array());
+
+ batch.cols[0] = inV;
+
+ batch.size = 6;
+ return batch;
+ }
+
public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) {
Random r = new Random(345);
VectorizedRowBatch batch = new VectorizedRowBatch(2);
@@ -297,6 +366,58 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[]
return batch;
}
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut(
+ long[] epochSecondValues, int[] nanoValues) {
+ Random r = new Random(345);
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ batch.size = epochSecondValues.length;
+
+ TimestampColumnVector inV;
+ BytesColumnVector outV;
+ inV = new TimestampColumnVector(batch.size);
+ outV = new BytesColumnVector(batch.size);
+
+ for (int i = 0; i < batch.size; i++) {
+ Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+ epochSecondValues[i] = randTimestamp.toEpochSecond();
+ nanoValues[i] = randTimestamp.getNanos();
+ inV.set(i, randTimestamp.toSqlTimestamp());
+ }
+
+ batch.cols[0] = inV;
+ batch.cols[1] = outV;
+
+ return batch;
+ }
+
+ public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOutFormatted() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2);
+ TimestampColumnVector timestampColumnV;
+ BytesColumnVector stringColumnV;
+ timestampColumnV = new TimestampColumnVector();
+ stringColumnV = new BytesColumnVector();
+
+ timestampColumnV.set(0, getSqlTimestamp("2019-12-31 19:20:21.999999999"));
+ timestampColumnV.set(1, getSqlTimestamp("1776-07-04 17:07:06.177617761"));
+ timestampColumnV.set(2, getSqlTimestamp("2012-02-29 23:59:59.999999999"));
+ timestampColumnV.set(3, getSqlTimestamp("1580-08-08 00:00:00"));
+ timestampColumnV.set(4, getSqlTimestamp("0005-01-01 00:00:00"));
+ timestampColumnV.set(5, getSqlTimestamp("9999-12-31 23:59:59.999999999"));
+
+ batch.cols[0] = timestampColumnV;
+ batch.cols[1] = stringColumnV;
+ batch.size = 6;
+ return batch;
+ }
+
+ private static java.sql.Timestamp getSqlTimestamp(String s) {
+ java.sql.Timestamp ts = java.sql.Timestamp.valueOf(s);
+ // subtract 8 hours because sql timestamps are assumed to be given in US/Pacific time
+ ts.setHours(ts.getHours() - 8);
+ return ts;
+ }
+
static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L;
public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
index 58fd7b030e..a449ea143d 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
@@ -23,7 +23,9 @@
import static org.junit.Assert.assertTrue;
import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
+import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@@ -72,6 +74,30 @@ public void testVectorCastDoubleToLong() throws HiveException {
Assert.assertEquals(1, resultV.vector[6]);
}
+ // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final
+ // value because VM in test time zone is PST.
+ private static final long TIME_DIFFERENCE = 28800000L;
+ @Test
+ public void testCastDateToString() throws HiveException {
+ int[] intValues = new int[100];
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDateToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < intValues.length; i++) {
+ expected =
+ new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+
+ Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result);
+ }
+ }
+
@Test
public void testCastDateToTimestamp() throws HiveException {
int[] intValues = new int[500];
@@ -192,6 +218,31 @@ public void testCastTimestampToDouble() throws HiveException {
}
}
+ @Test
+ public void testCastTimestampToString() throws HiveException {
+ int numberToTest = 100;
+ long[] epochSecondValues = new long[numberToTest];
+ int[] nanoValues = new int[numberToTest];
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues);
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastTimestampToString(0, 1);
+ expr.evaluate(b);
+
+ String expected, result;
+ for (int i = 0; i < numberToTest; i++) {
+ expected = org.apache.hadoop.hive.common.type.Timestamp
+ .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString();
+ byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(subbyte, StandardCharsets.UTF_8);
+ Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] +
+ " nanoseconds: " + nanoValues[i],
+ expected, result);
+ }
+ }
+
public byte[] toBytes(String s) {
byte[] b = null;
try {
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
new file mode 100644
index 0000000000..bd3c9525ea
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+/**
+ * Tests vectorized type cast udfs CastDateToStringWithFormat, CastTimestampToStringWithFormat,
+ * CastStringToDateWithFormat, CastStringToTimestampWithFormat.
+ */
+public class TestVectorTypeCastsWithFormat {
+
+ @Test
+ public void testCastDateToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastDateToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+ verifyString(0, "2019", resultV);
+ verifyString(1, "1776", resultV);
+ verifyString(2, "2012", resultV);
+ verifyString(3, "1580", resultV);
+ verifyString(4, "0005", resultV); //
+ verifyString(5, "9999", resultV);
+
+ expr = new CastDateToStringWithFormat(0, "MM".getBytes(), 1);
+ resultV = new BytesColumnVector();
+ b.cols[1] = resultV;
+ expr.evaluate(b);
+ verifyString(0, "12", resultV);
+ verifyString(1, "07", resultV);
+ verifyString(2, "02", resultV);
+ verifyString(3, "08", resultV);
+ verifyString(4, "01", resultV);
+ verifyString(5, "12", resultV);
+ }
+
+ @Test
+ public void testCastTimestampToStringWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOutFormatted();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[1];
+ VectorExpression expr = new CastTimestampToStringWithFormat(0, "yyyy".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("2019", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("1776", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("2012", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("1580", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("0004", getStringFromBytesColumnVector(resultV, 4)); //frogmethod should be 0005 because sql timestamp is used
+ Assert.assertEquals("9999", getStringFromBytesColumnVector(resultV, 5));
+
+ resultV = new BytesColumnVector();
+ b.cols[1] = resultV;
+ expr = new CastTimestampToStringWithFormat(0, "HH24".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals("19", getStringFromBytesColumnVector(resultV, 0));
+ Assert.assertEquals("17", getStringFromBytesColumnVector(resultV, 1));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 2));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 3));
+ Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 4));
+ Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 5));
+ }
+
+ @Test
+ public void testCastStringToTimestampWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInTimestampOutFormatted();
+ TimestampColumnVector resultV;
+ resultV = new TimestampColumnVector();
+ b.cols[1] = resultV;
+ VectorExpression expr = new CastStringToTimestampWithFormat(0, "yyyy.mm.dd HH24.mi.ss.ff".getBytes(), 1);
+ expr.evaluate(b);
+
+ verifyTimestamp("2019-12-31 00:00:00.999999999", resultV, 0);
+ verifyTimestamp("1776-07-04 17:07:06.177617761", resultV, 1);
+ verifyTimestamp("2012-02-29 23:59:59.999999999", resultV, 2);
+ verifyTimestamp("1580-08-08 00:00:00", resultV, 3);
+ verifyTimestamp("0005-01-01 00:00:00", resultV, 4);
+ verifyTimestamp("9999-12-31 23:59:59.999999999", resultV, 5);
+ }
+
+ private void verifyTimestamp(String tsString, TimestampColumnVector resultV, int index) {
+ Assert.assertEquals(Timestamp.valueOf(tsString).toEpochMilli(), resultV.time[index]);
+ Assert.assertEquals(Timestamp.valueOf(tsString).getNanos(), resultV.nanos[index]);
+ }
+
+ @Test
+ public void testCastStringToDateWithFormat() throws HiveException {
+ VectorizedRowBatch b =
+ TestVectorMathFunctions.getVectorizedRowBatchStringInDateOutFormatted();
+ LongColumnVector resultV;
+ resultV = new LongColumnVector();
+ b.cols[1] = resultV;
+ VectorExpression expr = new CastStringToDateWithFormat(0, "yyyy.mm.dd".getBytes(), 1);
+ expr.evaluate(b);
+
+ Assert.assertEquals(Date.valueOf("2019-12-31").toEpochDay(), resultV.vector[0]);
+ Assert.assertEquals(Date.valueOf("1776-07-04").toEpochDay(), resultV.vector[1]);
+ Assert.assertEquals(Date.valueOf("2012-02-29").toEpochDay(), resultV.vector[2]);
+ Assert.assertEquals(Date.valueOf("1580-08-08").toEpochDay(), resultV.vector[3]);
+ Assert.assertEquals(Date.valueOf("0005-01-01").toEpochDay(), resultV.vector[4]);
+ Assert.assertEquals(Date.valueOf("9999-12-31").toEpochDay(), resultV.vector[5]);
+ }
+
+ private void verifyString(int resultIndex, String expected, BytesColumnVector resultV) {
+ String result = getStringFromBytesColumnVector(resultV, resultIndex);
+ Assert.assertEquals(expected, result);
+ }
+
+ private String getStringFromBytesColumnVector(BytesColumnVector resultV, int i) {
+ String result;
+ byte[] resultBytes = Arrays.copyOfRange(resultV.vector[i], resultV.start[i],
+ resultV.start[i] + resultV.length[i]);
+ result = new String(resultBytes, StandardCharsets.UTF_8);
+ return result;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java
new file mode 100644
index 0000000000..4d7132a74c
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.TestGenericUDFUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests UDFFromUnixTime.
+ */
+public class TestUDFFromUnixTime {
+
+ @Test
+ public void testFromUnixTime() throws HiveException {
+ UDFFromUnixTime udf = new UDFFromUnixTime();
+
+ //int, no format
+ verifyInt(0, "1970-01-01 00:00:00", null, udf);
+ verifyInt(1296705906, "2011-02-03 04:05:06", null, udf);
+ verifyInt(1514818800, "2018-01-01 15:00:00", null, udf);
+
+ //long, no format
+ verifyLong(0L, "1970-01-01 00:00:00", null, udf);
+ verifyLong(1296705906L, "2011-02-03 04:05:06", null, udf);
+ verifyLong(1514818800L, "2018-01-01 15:00:00", null, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "0995-01-05 15:00:00", null, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "0005-01-01 00:00:00", null, udf);
+ verifyLong(253402300799L, "9999-12-31 23:59:59", null, udf);
+
+ //int with format
+ String format = "HH:mm:ss";
+ verifyInt(0, "00:00:00", format, udf);
+ verifyInt(1296705906, "04:05:06", format, udf);
+ verifyInt(1514818800, "15:00:00", format, udf);
+
+ //long with format
+ verifyLong(0L, "00:00:00", format, udf);
+ verifyLong(1296705906L, "04:05:06", format, udf);
+ verifyLong(1514818800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -30767590800L
+ verifyLong(-30767158800L, "15:00:00", format, udf);
+ // proleptic Gregorian input: -62009366400
+ verifyLong(-62009539200L, "00:00:00", format, udf);
+ verifyLong(253402300799L, "23:59:59", format, udf);
+
+ }
+
+ private void verifyInt(int value, String expected, String format, UDFFromUnixTime udf) {
+ IntWritable input = new IntWritable(value);
+ Text res;
+ if (format == null) {
+ res = udf.evaluate(input);
+ } else {
+ res = udf.evaluate(input, new Text(format));
+ }
+ Assert.assertEquals(expected, res.toString());
+ }
+
+ private void verifyLong(long value, String expected, String format, UDFFromUnixTime udf) {
+ LongWritable input = new LongWritable(value);
+ Text res;
+ if (format == null) {
+ res = udf.evaluate(input);
+ } else {
+ res = udf.evaluate(input, new Text(format));
+ }
+ Assert.assertEquals(expected, res.toString());
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
index 7c2ee15646..e9c188b883 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java
@@ -35,9 +35,9 @@
public class TestGenericUDFAddMonths extends TestCase {
- private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss");
- private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS");
- private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd");
+ private final Text fmtTextWithTime = new Text("yyyy-MM-dd HH:mm:ss");
+ private final Text fmtTextWithTimeAndms = new Text("yyyy-MM-dd HH:mm:ss.SSS");
+ private final Text fmtTextWithoutTime = new Text("yyyy-MM-dd");
private final Text fmtTextInvalid = new Text("YYYY-abcdz");
public void testAddMonthsInt() throws HiveException {
@@ -215,7 +215,6 @@ public void testAddMonthsLong() throws HiveException {
}
-
private void runAndVerify(String str, int months, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(new Text(str));
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
new file mode 100644
index 0000000000..c3744db766
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertNull;
+
+/**
+ * Tests cast udfs GenericUDFToString, GenericUDFToDate, GenericUDFTimestamp with second format
+ * argument. E.g. CAST ( AS STRING WITH FORMAT )
+ */
+public class TestGenericUDFCastWithFormat {
+
+ @Test
+ public void testDateToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ testCast(udf, inputOI, new DateWritableV2(Date.valueOf("2009-07-30")), "yyyy-MM-dd", "2009-07-30");
+ testCast(udf, inputOI, new DateWritableV2(Date.valueOf("2009-07-30")), "yyyy", "2009");
+ testCast(udf, inputOI, new DateWritableV2(Date.valueOf("1969-07-30")), "dd", "30");
+ }
+
+ @Test
+ public void testStringToDateWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToDate();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30", "yyyy-MM-dd", "2009-07-30");
+ testCast(udf, inputOI, "2009", "yyyy", "2009-01-01");
+ testCast(udf, inputOI, "30", "dd", "1970-01-30");
+ }
+
+ @Test
+ public void testStringToTimestampWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFTimestamp();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ testCast(udf, inputOI, "2009-07-30 01:02:03", "yyyy-MM-dd HH24:mi:ss", "2009-07-30 01:02:03");
+ testCast(udf, inputOI, "2009", "yyyy", "2009-01-01 00:00:00");
+ testCast(udf, inputOI, "07/30/2009 11:0200", "MM/dd/yyyy hh24:miss", "2009-07-30 11:02:00");
+ testCast(udf, inputOI, "69.07.30.", "yy.MM.dd.", "1969-07-30 00:00:00");
+ }
+
+ @Test
+ public void testTimestampToStringWithFormat() throws HiveException {
+ GenericUDF udf = new GenericUDFToString();
+ ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 00:00:08")), "yyyy-MM-dd HH24:mi:ss", "2009-07-30 00:00:08");
+ testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 11:02:00")), "MM/dd/yyyy hh24miss", "07/30/2009 110200");
+ testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("2009-07-30 01:02:03")), "MM", "07");
+ testCast(udf, inputOI, new TimestampWritableV2(Timestamp.valueOf("1969-07-30 00:00:00")), "yy", "69");
+ }
+
+ private void testCast(
+ GenericUDF udf, ObjectInspector inputOI, Object input, String format, String output)
+ throws HiveException {
+
+ ConstantObjectInspector formatOI =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format));
+ ObjectInspector[] arguments = {inputOI, formatOI};
+ udf.initialize(arguments);
+
+ GenericUDF.DeferredObject valueObj = new GenericUDF.DeferredJavaObject(input);
+ GenericUDF.DeferredObject formatObj = new GenericUDF.DeferredJavaObject(new Text(format));
+ GenericUDF.DeferredObject[] args = {valueObj, formatObj};
+
+ assertEquals("cast " + inputOI.getTypeName() + " to " + udf.getFuncName() + " failed ",
+ output, udf.evaluate(args).toString());
+
+ // Try with null args
+ GenericUDF.DeferredObject[] nullArgs = {new GenericUDF.DeferredJavaObject(null)};
+ assertNull(udf.getFuncName() + " with NULL arguments failed", udf.evaluate(nullArgs));
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
index 6a3cdda48a..8c7df4d966 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java
@@ -44,24 +44,32 @@ public void testDateFormatStr() throws HiveException {
udf.initialize(arguments);
// date str
- runAndVerifyStr("2015-04-05", fmtText, "Sunday", udf);
- runAndVerifyStr("2015-04-06", fmtText, "Monday", udf);
- runAndVerifyStr("2015-04-07", fmtText, "Tuesday", udf);
- runAndVerifyStr("2015-04-08", fmtText, "Wednesday", udf);
- runAndVerifyStr("2015-04-09", fmtText, "Thursday", udf);
- runAndVerifyStr("2015-04-10", fmtText, "Friday", udf);
- runAndVerifyStr("2015-04-11", fmtText, "Saturday", udf);
- runAndVerifyStr("2015-04-12", fmtText, "Sunday", udf);
+ runAndVerifyStr("2015-04-05", "Sunday", udf);
+ runAndVerifyStr("2015-04-06", "Monday", udf);
+ runAndVerifyStr("2015-04-07", "Tuesday", udf);
+ runAndVerifyStr("2015-04-08", "Wednesday", udf);
+ runAndVerifyStr("2015-04-09", "Thursday", udf);
+ runAndVerifyStr("2015-04-10", "Friday", udf);
+ runAndVerifyStr("2015-04-11", "Saturday", udf);
+ runAndVerifyStr("2015-04-12", "Sunday", udf);
// ts str
- runAndVerifyStr("2015-04-05 10:30:45", fmtText, "Sunday", udf);
- runAndVerifyStr("2015-04-06 10:30:45", fmtText, "Monday", udf);
- runAndVerifyStr("2015-04-07 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyStr("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyStr("2015-04-09 10:30", fmtText, "Thursday", udf);
- runAndVerifyStr("2015-04-10 10:30:45.123", fmtText, "Friday", udf);
- runAndVerifyStr("2015-04-11T10:30:45", fmtText, "Saturday", udf);
- runAndVerifyStr("2015-04-12 10", fmtText, "Sunday", udf);
+ runAndVerifyStr("2015-04-05 10:30:45", "Sunday", udf);
+ runAndVerifyStr("2015-04-06 10:30:45", "Monday", udf);
+ runAndVerifyStr("2015-04-07 10:30:45", "Tuesday", udf);
+ runAndVerifyStr("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyStr("2015-04-09 10:30", "Thursday", udf);
+ runAndVerifyStr("2015-04-10 10:30:45.123", "Friday", udf);
+ runAndVerifyStr("2015-04-11T10:30:45", "Saturday", udf);
+ runAndVerifyStr("2015-04-12 10", "Sunday", udf);
+
+ //make sure hour is ok
+ fmtText = new Text("hh");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyStr("2015-04-10 10:30:45.123", "10", udf);
}
public void testWrongDateStr() throws HiveException {
@@ -73,10 +81,10 @@ public void testWrongDateStr() throws HiveException {
ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2016-02-30 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyStr("2014-01-32", fmtText, "Saturday", udf);
- runAndVerifyStr("01/14/2014", fmtText, null, udf);
- runAndVerifyStr(null, fmtText, null, udf);
+ runAndVerifyStr("2016-02-30 10:30:45", "Tuesday", udf);
+ runAndVerifyStr("2014-01-32", "Saturday", udf);
+ runAndVerifyStr("01/14/2014", null, udf);
+ runAndVerifyStr(null, null, udf);
}
public void testDateFormatDate() throws HiveException {
@@ -89,14 +97,22 @@ public void testDateFormatDate() throws HiveException {
udf.initialize(arguments);
- runAndVerifyDate("2015-04-05", fmtText, "Sunday", udf);
- runAndVerifyDate("2015-04-06", fmtText, "Monday", udf);
- runAndVerifyDate("2015-04-07", fmtText, "Tuesday", udf);
- runAndVerifyDate("2015-04-08", fmtText, "Wednesday", udf);
- runAndVerifyDate("2015-04-09", fmtText, "Thursday", udf);
- runAndVerifyDate("2015-04-10", fmtText, "Friday", udf);
- runAndVerifyDate("2015-04-11", fmtText, "Saturday", udf);
- runAndVerifyDate("2015-04-12", fmtText, "Sunday", udf);
+ runAndVerifyDate("2015-04-05", "Sunday", udf);
+ runAndVerifyDate("2015-04-06", "Monday", udf);
+ runAndVerifyDate("2015-04-07", "Tuesday", udf);
+ runAndVerifyDate("2015-04-08", "Wednesday", udf);
+ runAndVerifyDate("2015-04-09", "Thursday", udf);
+ runAndVerifyDate("2015-04-10", "Friday", udf);
+ runAndVerifyDate("2015-04-11", "Saturday", udf);
+ runAndVerifyDate("2015-04-12", "Sunday", udf);
+
+ // make sure year is ok
+ fmtText = new Text("yyyy");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyDate("2015-04-08", "2015", udf);
}
public void testDateFormatTs() throws HiveException {
@@ -109,15 +125,24 @@ public void testDateFormatTs() throws HiveException {
udf.initialize(arguments);
- runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyTs("2015-04-05 10:30:45", fmtText, "Sunday", udf);
- runAndVerifyTs("2015-04-06 10:30:45", fmtText, "Monday", udf);
- runAndVerifyTs("2015-04-07 10:30:45", fmtText, "Tuesday", udf);
- runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf);
- runAndVerifyTs("2015-04-09 10:30:45", fmtText, "Thursday", udf);
- runAndVerifyTs("2015-04-10 10:30:45.123", fmtText, "Friday", udf);
- runAndVerifyTs("2015-04-11 10:30:45.123456789", fmtText, "Saturday", udf);
- runAndVerifyTs("2015-04-12 10:30:45", fmtText, "Sunday", udf);
+ runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyTs("2015-04-05 10:30:45", "Sunday", udf);
+ runAndVerifyTs("2015-04-06 10:30:45", "Monday", udf);
+ runAndVerifyTs("2015-04-07 10:30:45", "Tuesday", udf);
+ runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf);
+ runAndVerifyTs("2015-04-09 10:30:45", "Thursday", udf);
+ runAndVerifyTs("2015-04-10 10:30:45.123", "Friday", udf);
+ runAndVerifyTs("2015-04-11 10:30:45.123456789", "Saturday", udf);
+ runAndVerifyTs("2015-04-12 10:30:45", "Sunday", udf);
+
+ // make sure hour of day is ok
+ fmtText = new Text("HH");
+ valueOI1 = PrimitiveObjectInspectorFactory
+ .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
+ arguments[1] = valueOI1;
+ udf.initialize(arguments);
+ runAndVerifyTs("2015-04-08 00:30:45", "00", udf);
+
}
public void testNullFmt() throws HiveException {
@@ -126,11 +151,11 @@ public void testNullFmt() throws HiveException {
Text fmtText = null;
ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
- ObjectInspector[] arguments = { valueOI0, valueOI1 };
+ ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2015-04-05", fmtText, null, udf);
+ runAndVerifyStr("2015-04-05", null, udf);
}
public void testWrongFmt() throws HiveException {
@@ -139,38 +164,35 @@ public void testWrongFmt() throws HiveException {
Text fmtText = new Text("Q");
ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory
.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText);
- ObjectInspector[] arguments = { valueOI0, valueOI1 };
+ ObjectInspector[] arguments = {valueOI0, valueOI1};
udf.initialize(arguments);
- runAndVerifyStr("2015-04-05", fmtText, null, udf);
+ runAndVerifyStr("2015-04-05", null, udf);
}
- private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyStr(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
- private void runAndVerifyDate(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyDate(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new DateWritableV2(
Date.valueOf(str)) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
- private void runAndVerifyTs(String str, Text fmtText, String expResult, GenericUDF udf)
+ private void runAndVerifyTs(String str, String expResult, GenericUDF udf)
throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritableV2(
Timestamp.valueOf(str)) : null);
- DeferredObject valueObj1 = new DeferredJavaObject(fmtText);
- DeferredObject[] args = { valueObj0, valueObj1 };
+ DeferredObject[] args = {valueObj0};
Text output = (Text) udf.evaluate(args);
assertEquals("date_format() test ", expResult, output != null ? output.toString() : null);
}
diff --git ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
new file mode 100644
index 0000000000..4009f4bea4
--- /dev/null
+++ ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q
@@ -0,0 +1,69 @@
+drop table if exists timestamps;
+drop table if exists dates;
+drop table if exists strings;
+drop table if exists chars;
+drop table if exists varchars;
+
+--non-vectorized
+set hive.vectorized.execution.enabled=false;
+set hive.fetch.task.conversion=more;
+
+create table timestamps (t timestamp) stored as parquet;
+insert into timestamps values
+("2020-02-03"),
+("1969-12-31 23:59:59.999999999")
+;
+from timestamps select cast (t as string format "yyyy hh24...PM ff");
+from timestamps select cast (t as char(11) format "yyyy hh24...PM ff"); -- will be truncated
+from timestamps select cast (t as varchar(11) format "yyyy hh24...PM ff"); -- will be truncated
+
+create table dates (d date) stored as parquet;
+insert into dates values
+("2020-02-03"),
+("1969-12-31")
+;
+from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff99");
+from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff99"); -- will be truncated
+from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff99"); -- will be truncated
+
+create table strings (s string) stored as parquet;
+create table varchars (s varchar(11)) stored as parquet;
+create table chars (s char(11)) stored as parquet;
+insert into strings values
+("20 / 2 / 3"),
+("1969 12 31")
+;
+insert into varchars select * from strings;
+insert into chars select * from strings;
+
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
+
+
+--correct descriptions
+explain from strings select cast (s as timestamp format "yyy.mm.dd");
+explain from strings select cast (s as date format "yyy.mm.dd");
+explain from timestamps select cast (t as string format "yyyy");
+explain from timestamps select cast (t as varchar(12) format "yyyy");
+
+
+--vectorized
+set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+from timestamps select cast (t as string format "yyyy");
+from dates select cast (d as string format "yyyy");
+from timestamps select cast (t as varchar(11) format "yyyy");
+from dates select cast (d as varchar(11) format "yyyy");
+from timestamps select cast (t as char(11) format "yyyy");
+from dates select cast (d as char(11) format "yyyy");
+from strings select cast (s as timestamp format "yyyy.mm.dd");
+from varchars select cast (s as timestamp format "yyyy.mm.dd");
+from chars select cast (s as timestamp format "yyyy.mm.dd");
+from strings select cast (s as date format "yyyy.mm.dd");
+from varchars select cast (s as date format "yyyy.mm.dd");
+from chars select cast (s as date format "yyyy.mm.dd");
diff --git ql/src/test/results/clientpositive/sql_formats.q.out ql/src/test/results/clientpositive/sql_formats.q.out
new file mode 100644
index 0000000000..78dd3f2e76
--- /dev/null
+++ ql/src/test/results/clientpositive/sql_formats.q.out
@@ -0,0 +1,260 @@
+PREHOOK: query: drop table if exists timestamps
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists timestamps
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists timestampLocalTzs
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists timestampLocalTzs
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists dates
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists dates
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists strings
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists strings
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: --frogmethod do you want mapreduce or not
+
+create table timestamps (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: --frogmethod do you want mapreduce or not
+
+create table timestamps (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps
+PREHOOK: query: insert into timestamps values
+("2019-01-01"),
+("1969-12-31 23:59:59.999999999")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: insert into timestamps values
+("2019-01-01"),
+("1969-12-31 23:59:59.999999999")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.t SCRIPT []
+PREHOOK: query: from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2019
+1969
+PREHOOK: query: create table dates (d date) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates
+POSTHOOK: query: create table dates (d date) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates
+PREHOOK: query: insert into dates values
+("2019-01-01"),
+("1969-12-31")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates
+POSTHOOK: query: insert into dates values
+("2019-01-01"),
+("1969-12-31")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates
+POSTHOOK: Lineage: dates.d SCRIPT []
+PREHOOK: query: from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+2019
+1969
+PREHOOK: query: create table strings (s string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@strings
+POSTHOOK: query: create table strings (s string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@strings
+PREHOOK: query: insert into strings values
+("2019"),
+("1969")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@strings
+POSTHOOK: query: insert into strings values
+("2019"),
+("1969")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@strings
+POSTHOOK: Lineage: strings.s SCRIPT []
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01 00:00:00
+1969-01-01 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01
+1969-01-01
+PREHOOK: query: explain
+from strings select cast (s as timestamp format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+from strings select cast (s as timestamp format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS TIMESTAMP FORMAT 'yyyy') (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+from strings select cast (s as date format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+from strings select cast (s as date format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: strings
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( s AS DATE FORMAT 'yyyy') (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+from timestamps select cast (t as string format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+from timestamps select cast (t as string format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( t AS STRING FORMAT 'yyyy') (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: from strings select cast (s as timestamp format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as timestamp format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01 00:00:00
+1969-01-01 00:00:00
+PREHOOK: query: from strings select cast (s as date format "yyyy")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@strings
+#### A masked pattern was here ####
+POSTHOOK: query: from strings select cast (s as date format "yyyy")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@strings
+#### A masked pattern was here ####
+2019-01-01
+1969-01-01
diff --git ql/src/test/results/clientpositive/udf_string.q.out ql/src/test/results/clientpositive/udf_string.q.out
index aa764a9db6..6da63be8f8 100644
--- ql/src/test/results/clientpositive/udf_string.q.out
+++ ql/src/test/results/clientpositive/udf_string.q.out
@@ -2,12 +2,13 @@ PREHOOK: query: DESCRIBE FUNCTION string
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION string
POSTHOOK: type: DESCFUNCTION
-CAST( as STRING) - Converts the argument to a string value.
+CAST( as STRING [FORMAT ]) - Converts the argument to a string value.
PREHOOK: query: DESCRIBE FUNCTION EXTENDED string
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED string
POSTHOOK: type: DESCFUNCTION
-CAST( as STRING) - Converts the argument to a string value.
+CAST( as STRING [FORMAT ]) - Converts the argument to a string value.
+If format is specified with FORMAT argument then SQL:2016 datetime formats will be used.
Example:
> SELECT CAST(1234 AS string) FROM src LIMIT 1;
'1234'
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
index 4b6a3d6c10..4ff4732324 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java
@@ -21,6 +21,7 @@
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;
@@ -147,6 +148,10 @@ public String toString() {
return date.toString();
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ return date.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
return date.toEpochDay();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
index 9aa7f19ab2..5972bd92b5 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.time.format.DateTimeFormatter;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.common.type.TimestampUtils;
@@ -387,6 +388,16 @@ public String toString() {
return timestamp.format(DATE_TIME_FORMAT);
}
+ public String toStringFormatted(HiveDateTimeFormatter formatter) {
+ if (formatter == null) {
+ return toString();
+ }
+ if (timestampEmpty) {
+ populateTimestamp();
+ }
+ return timestamp.toStringFormatted(formatter);
+ }
+
@Override
public int hashCode() {
long seconds = getSeconds();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
index 84c027d51c..fabc92c7c5 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
@@ -20,6 +20,7 @@
import java.time.ZoneId;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -250,6 +251,7 @@ public Object convert(Object input) {
PrimitiveObjectInspector inputOI;
SettableDateObjectInspector outputOI;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public DateConverter(PrimitiveObjectInspector inputOI,
SettableDateObjectInspector outputOI) {
@@ -263,7 +265,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input,
- inputOI));
+ inputOI, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -272,6 +278,7 @@ public Object convert(Object input) {
SettableTimestampObjectInspector outputOI;
boolean intToTimestampInSeconds = false;
Object r;
+ private HiveDateTimeFormatter formatter = null;
public TimestampConverter(PrimitiveObjectInspector inputOI,
SettableTimestampObjectInspector outputOI) {
@@ -289,7 +296,11 @@ public Object convert(Object input) {
return null;
}
return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestamp(input,
- inputOI, intToTimestampInSeconds));
+ inputOI, intToTimestampInSeconds, formatter));
+ }
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
}
}
@@ -416,6 +427,7 @@ public Object convert(Object input) {
private static byte[] trueBytes = {'T', 'R', 'U', 'E'};
private static byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'};
+ private HiveDateTimeFormatter formatter = null;
public TextConverter(PrimitiveObjectInspector inputOI) {
// The output ObjectInspector is writableStringObjectInspector.
@@ -486,11 +498,12 @@ public Text convert(Object input) {
}
return t;
case DATE:
- t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
+ t.set(((DateObjectInspector) inputOI)
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMP:
t.set(((TimestampObjectInspector) inputOI)
- .getPrimitiveWritableObject(input).toString());
+ .getPrimitiveWritableObject(input).toStringFormatted(formatter));
return t;
case TIMESTAMPLOCALTZ:
t.set(((TimestampLocalTZObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
@@ -520,6 +533,10 @@ public Text convert(Object input) {
throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName());
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
/**
@@ -545,6 +562,7 @@ public Object convert(Object input) {
PrimitiveObjectInspector inputOI;
SettableHiveVarcharObjectInspector outputOI;
Object hc;
+ private HiveDateTimeFormatter formatter;
public HiveVarcharConverter(PrimitiveObjectInspector inputOI,
SettableHiveVarcharObjectInspector outputOI) {
@@ -572,16 +590,21 @@ public Object convert(Object input) {
((BooleanObjectInspector) inputOI).get(input) ?
new HiveVarchar("TRUE", -1) : new HiveVarchar("FALSE", -1));
default:
- return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI));
+ return outputOI.set(hc,
+ PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI, formatter));
}
}
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
public static class HiveCharConverter implements Converter {
PrimitiveObjectInspector inputOI;
SettableHiveCharObjectInspector outputOI;
Object hc;
+ private HiveDateTimeFormatter formatter;
public HiveCharConverter(PrimitiveObjectInspector inputOI,
SettableHiveCharObjectInspector outputOI) {
@@ -601,8 +624,13 @@ public Object convert(Object input) {
((BooleanObjectInspector) inputOI).get(input) ?
new HiveChar("TRUE", -1) : new HiveChar("FALSE", -1));
default:
- return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI));
+ return outputOI.set(hc,
+ PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI, formatter));
}
}
+
+ public void setDateTimeFormatter(HiveDateTimeFormatter formatter) {
+ this.formatter = formatter;
+ }
}
}
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 3886b202c7..778ca68f8b 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -29,6 +29,7 @@
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -891,12 +892,18 @@ public static float getFloat(Object o, PrimitiveObjectInspector oi) {
return (float) getDouble(o, oi);
}
+ public static String getString(Object o, PrimitiveObjectInspector oi) {
+ return getString(o, oi, null);
+ }
+
/**
* Get the String value out of a primitive object. Note that
* NullPointerException will be thrown if o is null. Note that
* RuntimeException will be thrown if o is not a valid string.
+ * HiveDateTimeFormatter is optional.
*/
- public static String getString(Object o, PrimitiveObjectInspector oi) {
+ public static String getString(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
@@ -951,10 +958,12 @@ public static String getString(Object o, PrimitiveObjectInspector oi) {
result = hcoi.getPrimitiveJavaObject(o).toString();
break;
case DATE:
- result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o)
+ .toStringFormatted(formatter);
break;
case TIMESTAMP:
- result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)
+ .toStringFormatted(formatter);
break;
case TIMESTAMPLOCALTZ:
result = ((TimestampLocalTZObjectInspector) oi).getPrimitiveWritableObject(o).toString();
@@ -978,6 +987,11 @@ public static String getString(Object o, PrimitiveObjectInspector oi) {
}
public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi) {
+ return getHiveChar(o, oi, null);
+ }
+
+ public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
@@ -990,13 +1004,18 @@ public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi) {
default:
// No char length available, copy whole string value here.
result = new HiveChar();
- result.setValue(getString(o, oi));
+ result.setValue(getString(o, oi, formatter));
break;
}
return result;
}
public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) {
+ return getHiveVarchar(o, oi, null);
+ }
+
+ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi,
+ HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
@@ -1012,7 +1031,7 @@ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi)
// It might actually be ok as long as there is an object inspector (with char length)
// receiving this value.
result = new HiveVarchar();
- result.setValue(getString(o, oi));
+ result.setValue(getString(o, oi, formatter));
break;
}
return result;
@@ -1113,6 +1132,11 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi)
}
public static Date getDate(Object o, PrimitiveObjectInspector oi) {
+ return getDate(o, oi, null);
+ }
+
+ public static Date getDate(
+ Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
if (o == null) {
return null;
}
@@ -1125,13 +1149,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
StringObjectInspector soi = (StringObjectInspector) oi;
String s = soi.getPrimitiveJavaObject(o).trim();
try {
- if (s.length() == DATE_LENGTH) {
- result = Date.valueOf(s);
- } else {
- Timestamp ts = getTimestampFromString(s);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(s, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1141,13 +1161,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
case VARCHAR: {
String val = getString(o, oi).trim();
try {
- if (val.length() == DATE_LENGTH) {
- result = Date.valueOf(val);
- } else {
- Timestamp ts = getTimestampFromString(val);
- if (ts != null) {
- result = Date.ofEpochMilli(ts.toEpochMilli());
- }
+ Date date = getDateFromString(val, formatter);
+ if (date != null) {
+ result = date;
}
} catch (IllegalArgumentException e) {
// Do nothing
@@ -1177,11 +1193,46 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
return result;
}
+ private final static int DATE_LENGTH = "YYYY-MM-DD".length();
+ private static Date getDateFromString(String s, HiveDateTimeFormatter formatter) {
+
+ // with SQL formats
+ if (formatter != null) {
+ try {
+ return Date.valueOf(s, formatter);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
+ // without SQL formats
+ if (s.length() == DATE_LENGTH) {
+ return Date.valueOf(s);
+ } else {
+ Timestamp ts = getTimestampFromString(s);
+ if (ts != null) {
+ return Date.ofEpochMilli(ts.toEpochMilli());
+ }
+ }
+ return null;
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) {
return getTimestamp(o, oi, false);
}
+ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) {
+ return getTimestamp(o, oi, false, formatter);
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, boolean intToTimestampInSeconds) {
+ return getTimestamp(o, inputOI, intToTimestampInSeconds, null);
+ }
+
+ public static Timestamp getTimestamp(Object o,
+ PrimitiveObjectInspector inputOI,
+ boolean intToTimestampInSeconds,
+ HiveDateTimeFormatter format) {
if (o == null) {
return null;
}
@@ -1225,11 +1276,11 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
case STRING:
StringObjectInspector soi = (StringObjectInspector) inputOI;
String s = soi.getPrimitiveJavaObject(o);
- result = getTimestampFromString(s);
+ result = getTimestampFromString(s, format);
break;
case CHAR:
case VARCHAR:
- result = getTimestampFromString(getString(o, inputOI));
+ result = getTimestampFromString(getString(o, inputOI), format);
break;
case DATE:
result = Timestamp.ofEpochMilli(
@@ -1254,15 +1305,16 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI,
return result;
}
- private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length();
- private final static int DATE_LENGTH = "YYYY-MM-DD".length();
-
public static Timestamp getTimestampFromString(String s) {
- s = s.trim();
+ return getTimestampFromString(s, null);
+ }
+
+ public static Timestamp getTimestampFromString(String s, HiveDateTimeFormatter formatter) {
+
s = trimNanoTimestamp(s);
try {
- return TimestampUtils.stringToTimestamp(s);
+ return Timestamp.valueOf(s, formatter);
} catch (IllegalArgumentException e) {
return null;
}
@@ -1284,19 +1336,6 @@ private static String trimNanoTimestamp(String s) {
return s;
}
- private static boolean isValidTimeStamp(final String s) {
- if (s.length() == TS_LENGTH ||
- (s.contains(".") &&
- s.substring(0, s.indexOf('.')).length() == TS_LENGTH)) {
- // Possible timestamp
- if (s.charAt(DATE_LENGTH) == '-') {
- return false;
- }
- return true;
- }
- return false;
- }
-
public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi,
ZoneId timeZone) {
if (o == null) {