diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java new file mode 100644 index 0000000000..a158d4befd --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +/** + * Interface used for formatting and parsing timestamps. Initially created so that user is able to + * optionally format datetime objects into strings and parse strings into datetime objects with + * SQL:2016 semantics, as well as with the legacy (java.text.SimpleDateFormat) format. + */ +public interface HiveDateTimeFormatter { + /** + * Format the given timestamp into a string. + * + * @throws IllegalArgumentException if timestamp cannot be formatted. + */ + String format(Timestamp ts); + + /** + * Format the given date into a string. + * + * @throws IllegalArgumentException if date cannot be formatted. + */ + String format(Date date); + + /** + * Parse the given string into a timestamp. + * + * @throws IllegalArgumentException if string cannot be parsed. + */ + Timestamp parseTimestamp(String string); + + /** + * Parse the given string into a timestamp. + * + * @throws IllegalArgumentException if string cannot be parsed. + */ + Date parseDate(String string); + + /** + * Get the format pattern to be used for formatting datetime objects or parsing strings. + */ + String getPattern(); +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java new file mode 100644 index 0000000000..409a902e65 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +/** + * Wrapper for DateTimeFormatter in the java.time package. + */ +public class HiveJavaDateTimeFormatter implements HiveDateTimeFormatter { + + private DateTimeFormatter formatter; + + public HiveJavaDateTimeFormatter(DateTimeFormatter formatter) { + this.formatter = formatter; + } + + @Override public String format(Timestamp ts) { + return formatter.format( + LocalDateTime.ofInstant( + Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneId.of("UTC"))); + } + + @Override public String format(Date date) { + return format(Timestamp.ofEpochMilli(date.toEpochMilli())); + } + + @Override public Timestamp parseTimestamp(String string) { + LocalDateTime ldt = LocalDateTime.parse(string, formatter); + return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano()); + } + + @Override public Date parseDate(String string) { + return Date.ofEpochMilli(parseTimestamp(string).toEpochMilli()); + } + + @Override public String getPattern() { + return formatter.toString(); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java new file mode 100644 index 0000000000..2f8070d613 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.Date; +import java.util.TimeZone; + +/** + * Wrapper for java.text.SimpleDateFormat. + */ +public class HiveSimpleDateFormatter implements HiveDateTimeFormatter { + + private SimpleDateFormat format = new SimpleDateFormat(); + private String pattern; + + public HiveSimpleDateFormatter(String pattern, TimeZone timeZone) { + setPattern(pattern); + format.setTimeZone(timeZone); + } + + @Override public String format(Timestamp ts) { + Date date = new Date(ts.toEpochMilli()); + return format.format(date); + } + + @Override public String format(org.apache.hadoop.hive.common.type.Date date) { + return null; + } + + @Override public Timestamp parseTimestamp(String string) { + try { + Date date = format.parse(string); + return Timestamp.ofEpochMilli(date.getTime()); + } catch (java.text.ParseException e) { + throw new IllegalArgumentException( + "String " + string + " could not be parsed by java.text.SimpleDateFormat: " + format); + } + } + + @Override public org.apache.hadoop.hive.common.type.Date parseDate(String string) { + return org.apache.hadoop.hive.common.type.Date.ofEpochMilli( + parseTimestamp(string).toEpochMilli()); + } + + private void setPattern(String pattern) { + format.applyPattern(pattern); + this.pattern = pattern; + } + + @Override public String getPattern() { + return pattern; + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java new file mode 100644 index 0000000000..3478638813 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java @@ -0,0 +1,831 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.time.DateTimeException; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; +import java.time.temporal.ChronoUnit; +import java.time.temporal.TemporalField; +import java.time.temporal.TemporalUnit; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; + +/** + * Formatter using SQL:2016 datetime patterns. + * + * For all tokens: + * - Patterns are case-insensitive, except AM/PM and T/Z. See these sections for more details. + * - For string to datetime conversion, no duplicate format tokens are allowed, including tokens + * that have the same meaning but different lengths ("Y" and "YY" conflict) or different + * behaviors ("RR" and "YY" conflict). + * + * For all numeric tokens: + * - The "expected length" of input/output is the number of tokens in the character (e.g. "YYY": 3, + * "Y": 1, and so on), with some exceptions (see map SPECIAL_LENGTHS). + * - For string to datetime conversion, inputs of fewer digits than expected are accepted if + * followed by a delimiter, e.g. format="YYYY-MM-DD", input="19-1-1", output=2019-01-01 00:00:00. + * - For datetime to string conversion, output is left padded with zeros, e.g. format="DD SSSSS", + * input=2019-01-01 00:00:03, output="01 00003". + * + * + * Accepted format tokens: + * Note: "|" means "or". "Delimiter" means a separator, tokens T or Z, or end of input. + * + * A. Temporal tokens + * YYYY + * 4-digit year + * - For string to datetime conversion, prefix digits for 1, 2, and 3-digit inputs are obtained + * from current date + * E.g. input=‘9-01-01’, pattern =‘YYYY-MM-DD’, current year=2020, output=2029-01-01 00:00:00 + * + * + * YYY + * Last 3 digits of a year + * - Gets the prefix digit from current date. + * - Can accept fewer digits than 3, similarly to YYYY. + * + * YY + * Last 2 digits of a year + * - Gets the 2 prefix digits from current date. + * - Can accept fewer digits than 2, similarly to YYYY. + * + * Y + * Last digit of a year + * - Gets the 3 prefix digits from current date. + * + * RRRR + * 4-digit rounded year + * - String to datetime conversion: + * - If 2 digits are provided then acts like RR. + * - If 1,3 or 4 digits provided then acts like YYYY. + * - For datetime to string conversion, acts like YYYY. + * + * RR + * 2-digit rounded year + * -String to datetime conversion: + * - Semantics: + * Input: Last 2 digits of current year: First 2 digits of output: + * 0 to 49 00 to 49 First 2 digits of current year + * 0 to 49 50 to 99 First 2 digits of current year + 1 + * 50 to 99 00 to 49 First 2 digits of current year - 1 + * 50 to 99 50 to 99 First 2 digits of current year + * - If 1-digit year is provided followed by a delimiter, falls back to YYYY with 1-digit year + * input. + * - For datetime to string conversion, acts like YY. + * + * MM + * Month (1-12) + * - For string to datetime conversion, conflicts with DDD. + * + * DD + * Day of month (1-31) + * - For string to datetime conversion, conflicts with DDD. + * + * DDD + * Day of year (1-366) + * - For string to datetime conversion, conflicts with DD and MM. + * + * HH + * Hour of day (1-12) + * - If no AM/PM provided then defaults to AM. + * - In string to datetime conversion, conflicts with SSSSS and HH24. + * + * HH12 + * Hour of day (1-12) + * See HH. + * + * HH24 + * Hour of day (0-23) + * - In string to datetime conversion, conflicts with SSSSS, HH12 and AM/PM. + * + * MI + * Minute of hour (0-59) + * - In string to datetime conversion, conflicts with SSSSS. + * + * SS + * Second of minute (0-59) + * - In string to datetime conversion, conflicts with SSSSS. + * + * SSSSS + * Second of Day (0-86399) + * - In string to datetime conversion, conflicts with SS, HH, HH12, HH24, MI, AM/PM. + * + * FF[1..9] + * Fraction of second + * - 1..9 indicates the number of decimal digits. "FF" (no number of digits specified) is also + * accepted. + * - In datetime to string conversion, "FF" will omit trailing zeros, or output "0" if subsecond + * value is 0. + * - In string to datetime conversion, fewer digits than expected are accepted if followed by a + * delimiter. "FF" acts like "FF9". + * + * AM|A.M. + * Meridiem indicator or AM/PM + * - Datetime to string conversion: + * - AM and PM mean the exact same thing in the pattern. + * e.g. input=2019-01-01 20:00, format=“AM”, output=“PM”. + * - Retains the exact format (capitalization and length) provided in the pattern string. If p.m. + * is in the pattern, we expect a.m. or p.m. in the output; if AM is in the pattern, we expect + * AM or PM in the output. + * - String to datetime conversion: + * - Conflicts with HH24 and SSSSS. + * - It doesn’t matter which meridian indicator is in the pattern. + * E.g. input="2019-01-01 11:00 p.m.", pattern="YYYY-MM-DD HH12:MI AM", + * output=2019-01-01 23:00:00 + * + * PM|P.M. + * Meridiem indicator + * See AM|A.M. + * + * B. Time zone tokens + * TZH + * Time zone offset hour (-15 to +15) + * - 3-character-long input is expected: 1 character for the sign and 2 digits for the value. + * e.g. “+10”, “-05” + * - 2-digit input is accepted without the sign, e.g. “04”. + * - Both these 2 and 3-digit versions are accepted even if not followed by separators. + * - Disabled for timestamp to string and date to string conversion, as timestamp and date are time + * zone agnostic. + * + * TZM + * Time zone offset minute (0-59) + * - For string to datetime conversion: + * - TZH token is required. + * - Unsigned; sign comes from TZH. + * - Therefore time zone offsets like “-30” minutes should be expressed thus: input=“-00:30” + * pattern=“TZH:TZM”. + * - Disabled for timestamp to string and date to string conversion, as timestamp and date are time + * zone agnostic. + * + * C. Separators + * -|.|/|,|'|;|:| + * Separator + * - Uses loose matching. Existence of a sequence of separators in the format should match the + * existence of a sequence of separators in the input regardless of the types of the separator or + * the length of the sequence where length > 1. E.g. input=“2019-. ;10/10”, pattern=“YYYY-MM-DD” + * is valid; input=“20191010”, pattern=“YYYY-MM-DD” is not valid. + * - If the last separator character in the separator substring is "-" and is immediately followed + * by a time zone hour (tzh) token, it's a negative sign and not counted as a separator, UNLESS + * this is the only possible separator character in the separator substring (in which case it is + * not counted as the tzh's negative sign). + * + * D. ISO 8601 delimiters + * T + * ISO 8601 delimiter + * - Serves as a delimiter. + * - Function is to support formats like “YYYY-MM-DDTHH24:MI:SS.FF9Z”, “YYYY-MM-DD-HH24:MI:SSZ” + * - For datetime to string conversion, output is always capitalized ("T"), even if lowercase ("t") + * is provided in the pattern. + * + * Z + * ISO 8601 delimiter + * See T. + */ + +public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter { + + private static final int LONGEST_TOKEN_LENGTH = 5; + private static final int LONGEST_ACCEPTED_PATTERN = 100; // for sanity's sake + private static final long MINUTES_PER_HOUR = 60; + private static final int FIFTY = 50; + private static final int NANOS_MAX_LENGTH = 9; + public static final int AM = 0; + public static final int PM = 1; + private String pattern; + protected List tokens = new ArrayList<>(); + + private static final Map VALID_TEMPORAL_TOKENS = + ImmutableMap.builder() + .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR) + .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR) + .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR) + .put("mm", ChronoField.MONTH_OF_YEAR) + .put("dd", ChronoField.DAY_OF_MONTH) + .put("ddd", ChronoField.DAY_OF_YEAR) + .put("hh", ChronoField.HOUR_OF_AMPM) + .put("hh12", ChronoField.HOUR_OF_AMPM) + .put("hh24", ChronoField.HOUR_OF_DAY) + .put("mi", ChronoField.MINUTE_OF_HOUR) + .put("ss", ChronoField.SECOND_OF_MINUTE) + .put("sssss", ChronoField.SECOND_OF_DAY) + .put("ff1", ChronoField.NANO_OF_SECOND).put("ff2", ChronoField.NANO_OF_SECOND) + .put("ff3", ChronoField.NANO_OF_SECOND).put("ff4", ChronoField.NANO_OF_SECOND) + .put("ff5", ChronoField.NANO_OF_SECOND).put("ff6", ChronoField.NANO_OF_SECOND) + .put("ff7", ChronoField.NANO_OF_SECOND).put("ff8", ChronoField.NANO_OF_SECOND) + .put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND) + .put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY) + .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY) + .build(); + + private static final Map VALID_TIME_ZONE_TOKENS = + ImmutableMap.builder() + .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build(); + + static final List VALID_ISO_8601_DELIMITERS = + ImmutableList.of("t", "z"); + + static final List VALID_SEPARATORS = + ImmutableList.of("-", ":", " ", ".", "/", ";", "\'", ","); + + private static final Map SPECIAL_LENGTHS = ImmutableMap.builder() + .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4) + .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5) + .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9) + .build(); + + /** + * Represents broad categories of tokens. + */ + public enum TokenType { + TEMPORAL, + SEPARATOR, + TIMEZONE, + ISO_8601_DELIMITER + } + + /** + * Token representation. + */ + public static class Token { + TokenType type; + TemporalField temporalField; // for type TEMPORAL e.g. ChronoField.YEAR + TemporalUnit temporalUnit; // for type TIMEZONE e.g. ChronoUnit.HOURS + String string; // pattern string, e.g. "yyy" + int length; // length (e.g. YYY: 3, FF8: 8) + + public Token(TemporalField temporalField, String string, int length) { + this(TokenType.TEMPORAL, temporalField, null, string, length); + } + + public Token(TemporalUnit temporalUnit, String string, int length) { + this(TokenType.TIMEZONE, null, temporalUnit, string, length); + } + + public Token(TokenType tokenType, String string) { + this(tokenType, null, null, string, string.length()); + } + + public Token(TokenType tokenType, TemporalField temporalField, TemporalUnit temporalUnit, + String string, int length) { + this.type = tokenType; + this.temporalField = temporalField; + this.temporalUnit = temporalUnit; + this.string = string; + this.length = length; + } + + @Override public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(string); + sb.append(" type: "); + sb.append(type); + if (temporalField != null) { + sb.append(" temporalField: "); + sb.append(temporalField); + } else if (temporalUnit != null) { + sb.append(" temporalUnit: "); + sb.append(temporalUnit); + } + return sb.toString(); + } + } + + public HiveSqlDateTimeFormatter(String pattern, boolean forParsing) { + setPattern(pattern, forParsing); + } + + /** + * Parse and perhaps verify the pattern. + */ + private void setPattern(String pattern, boolean forParsing) { + assert pattern.length() < LONGEST_ACCEPTED_PATTERN : "The input format is too long"; + + this.pattern = parsePatternToTokens(pattern); + + // throw Exception if list of tokens doesn't make sense for parsing. Formatting is less picky. + if (forParsing) { + verifyForParse(); + } else { + verifyForFormat(); + } + } + + /** + * Parse pattern to list of tokens. + */ + private String parsePatternToTokens(String pattern) { + tokens.clear(); + String originalPattern = pattern; + pattern = pattern.toLowerCase(); + + // indexes of the substring we will check (includes begin, does not include end) + int begin=0, end=0; + String candidate; + Token lastAddedToken = null; + + while (begin < pattern.length()) { + + // if begin hasn't progressed, then something is unparseable + if (begin != end) { + tokens.clear(); + throw new IllegalArgumentException("Bad date/time conversion format: " + pattern); + } + + //process next token: start with substring + for (int i = LONGEST_TOKEN_LENGTH; i > 0; i--) { + end = begin + i; + if (end > pattern.length()) { // don't go past the end of the pattern string + continue; + } + candidate = pattern.substring(begin, end); + // if it's a separator, then clump it with immediately preceding separators (e.g. "---" + // counts as one separator). + if (candidate.length() == 1 && VALID_SEPARATORS.contains(candidate)) { + if (lastAddedToken != null && lastAddedToken.type == TokenType.SEPARATOR) { + lastAddedToken.string += candidate; + lastAddedToken.length += 1; + } else { + lastAddedToken = new Token(TokenType.SEPARATOR, candidate); + tokens.add(lastAddedToken); + } + begin = end; + break; + } else if (candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate)) { + lastAddedToken = new Token(TokenType.ISO_8601_DELIMITER, candidate.toUpperCase()); + tokens.add(lastAddedToken); + begin = end; + break; + //temporal token + } else if (VALID_TEMPORAL_TOKENS.keySet().contains(candidate)) { + // for AM/PM, keep original case + if (VALID_TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) { + int subStringEnd = begin + candidate.length(); + candidate = originalPattern.substring(begin, subStringEnd); + //token string may be capitalized, update pattern + pattern = pattern.substring(0, begin) + candidate + pattern.substring(subStringEnd); + } + lastAddedToken = new Token(VALID_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate, + getTokenStringLength(candidate.toLowerCase())); + tokens.add(lastAddedToken); + begin = end; + break; + //time zone + } else if (VALID_TIME_ZONE_TOKENS.keySet().contains(candidate)) { + lastAddedToken = new Token(VALID_TIME_ZONE_TOKENS.get(candidate), candidate, + getTokenStringLength(candidate)); + tokens.add(lastAddedToken); + begin = end; + break; + } + } + } + return pattern; + } + + private int getTokenStringLength(String candidate) { + if (SPECIAL_LENGTHS.containsKey(candidate)) { + return SPECIAL_LENGTHS.get(candidate); + } + return candidate.length(); + } + + /** + * Make sure the generated list of tokens is valid for parsing strings to datetime objects. + */ + private void verifyForParse() { + + // create a list of tokens' temporal fields + ArrayList temporalFields = new ArrayList<>(); + ArrayList timeZoneTemporalUnits = new ArrayList<>(); + int roundYearCount=0, yearCount=0; + for (Token token : tokens) { + if (token.temporalField != null) { + temporalFields.add(token.temporalField); + if (token.temporalField == ChronoField.YEAR) { + if (token.string.startsWith("r")) { + roundYearCount += 1; + } else { + yearCount += 1; + } + } + } else if (token.temporalUnit != null) { + timeZoneTemporalUnits.add(token.temporalUnit); + } + } + + if (roundYearCount > 0 && yearCount > 0) { + throw new IllegalArgumentException("Invalid duplication of format element: Both year and" + + "round year are provided"); + } + for (TemporalField tokenType : temporalFields) { + if (Collections.frequency(temporalFields, tokenType) > 1) { + throw new IllegalArgumentException( + "Invalid duplication of format element: multiple " + tokenType.toString() + + " tokens provided."); + } + } + if (temporalFields.contains(ChronoField.AMPM_OF_DAY) && + !(temporalFields.contains(ChronoField.HOUR_OF_DAY) || + temporalFields.contains(ChronoField.HOUR_OF_AMPM))) { + throw new IllegalArgumentException("Missing hour token."); + } + if (temporalFields.contains(ChronoField.AMPM_OF_DAY) && + temporalFields.contains(ChronoField.HOUR_OF_DAY)) { + throw new IllegalArgumentException("Conflict between median indicator and hour token."); + } + if (temporalFields.contains(ChronoField.HOUR_OF_AMPM) && + temporalFields.contains(ChronoField.HOUR_OF_DAY)) { + throw new IllegalArgumentException("Conflict between hour of day and hour of am/pm token."); + } + if (temporalFields.contains(ChronoField.DAY_OF_YEAR) && + (temporalFields.contains(ChronoField.DAY_OF_MONTH) || + temporalFields.contains(ChronoField.MONTH_OF_YEAR))) { + throw new IllegalArgumentException("Day of year provided with day or month token."); + } + if (temporalFields.contains(ChronoField.SECOND_OF_DAY) && + (temporalFields.contains(ChronoField.HOUR_OF_DAY) || + temporalFields.contains(ChronoField.HOUR_OF_AMPM) || + temporalFields.contains(ChronoField.MINUTE_OF_HOUR) || + temporalFields.contains(ChronoField.SECOND_OF_MINUTE))) { + throw new IllegalArgumentException( + "Second of day token conflicts with other token(s)."); + } + if (timeZoneTemporalUnits.contains(ChronoUnit.MINUTES) && + !timeZoneTemporalUnits.contains(ChronoUnit.HOURS)) { + throw new IllegalArgumentException("Time zone minute token provided without time zone hour token."); + } + } + + /** + * Make sure the generated list of tokens is valid for formatting datetime objects to strings. + */ + private void verifyForFormat() { + for (Token token : tokens) { + if (token.type == TokenType.TIMEZONE) { + throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for " + + "timestamp or date."); + } + } + } + + @Override public String format(Timestamp ts) { + StringBuilder fullOutputSb = new StringBuilder(); + String outputString = null; + int value; + LocalDateTime localDateTime = + LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC); + for (Token token : tokens) { + switch (token.type) { + case TEMPORAL: + try { + value = localDateTime.get(token.temporalField); + outputString = formatTemporal(value, token); + } catch (DateTimeException e) { + throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from " + + "LocalDateTime " + localDateTime, e); + } + break; + case TIMEZONE: //invalid for timestamp and date + throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for " + + "timestamp or date."); + case SEPARATOR: + outputString = token.string; + break; + case ISO_8601_DELIMITER: + outputString = token.string.toUpperCase(); + break; + default: + //do nothing + } + fullOutputSb.append(outputString); + } + return fullOutputSb.toString(); + } + + @Override public String format(Date date) { + return format(Timestamp.ofEpochSecond(date.toEpochSecond())); + } + + private String formatTemporal(int value, Token token) { + String output; + if (token.temporalField == ChronoField.AMPM_OF_DAY) { + output = value == 0 ? "a" : "p"; + output += token.string.length() == 2 ? "m" : ".m."; + if (token.string.startsWith("A") || token.string.startsWith("P")) { + output = output.toUpperCase(); + } + } else { + // it's a numeric value + try { + output = String.valueOf(value); + output = padOrTruncateNumericTemporal(token, output); + } catch (Exception e) { + throw new IllegalArgumentException("Value: " + value + " couldn't be cast to string.", e); + } + } + return output; + } + + /** + * To match token.length, pad left with zeroes or truncate. + */ + private String padOrTruncateNumericTemporal(Token token, String output) { + if (output.length() < token.length) { + output = StringUtils.leftPad(output, token.length, '0'); // pad left + } else if (output.length() > token.length) { + if (token.temporalField == ChronoField.NANO_OF_SECOND) { + output = output.substring(0, token.length); // truncate right + } else { + output = output.substring(output.length() - token.length); // truncate left + } + } + if (token.temporalField == ChronoField.NANO_OF_SECOND + && token.string.equalsIgnoreCase("ff")) { + output = output.replaceAll("0*$", ""); //truncate trailing 0's + if (output.isEmpty()) { + output = "0"; + } + } + return output; + } + + /** + * Left here for timestamp with local time zone. + */ + private String formatTimeZone(TimeZone timeZone, LocalDateTime localDateTime, Token token) { + ZoneOffset offset = timeZone.toZoneId().getRules().getOffset(localDateTime); + Duration seconds = Duration.of(offset.get(ChronoField.OFFSET_SECONDS), ChronoUnit.SECONDS); + if (token.string.equals("tzh")) { + long hours = seconds.toHours(); + String s = (hours >= 0) ? "+" : "-"; + s += (Math.abs(hours) < 10) ? "0" : ""; + s += String.valueOf(Math.abs(hours)); + return s; + } else { + long minutes = Math.abs(seconds.toMinutes() % MINUTES_PER_HOUR); + String s = String.valueOf(minutes); + if (s.length() == 1) { + s = "0" + s; + } + return s; + } + } + + @Override public Timestamp parseTimestamp(String fullInput){ + LocalDateTime ldt = LocalDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC); + String substring; + int index = 0; + int value; + int timeZoneSign = 0, timeZoneHours = 0, timeZoneMinutes = 0; + + for (Token token : tokens) { + switch (token.type) { + case TEMPORAL: + substring = getNextSubstring(fullInput, index, token); // e.g. yy-m -> yy + value = parseTemporal(substring, token); // e.g. 18->2018, July->07 + try { + ldt = ldt.with(token.temporalField, value); + } catch (DateTimeException e){ + throw new IllegalArgumentException( + "Value " + value + " not valid for token " + token.toString()); + } + index += substring.length(); + break; + case TIMEZONE: + if (token.temporalUnit == ChronoUnit.HOURS) { + String nextCharacter = fullInput.substring(index, index + 1); + timeZoneSign = "-".equals(nextCharacter) ? -1 : 1; + if ("-".equals(nextCharacter) || "+".equals(nextCharacter)) { + index++; + } + // parse next two digits + substring = getNextSubstring(fullInput, index, index + 2, token); + try { + timeZoneHours = Integer.parseInt(substring); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Couldn't parse substring \"" + substring + + "\" with token " + token + " to int. Pattern is " + pattern, e); + } + if (timeZoneHours < -15 || timeZoneHours > 15) { + throw new IllegalArgumentException("Couldn't parse substring \"" + substring + + "\" to TZH because TZH range is -15 to +15. Pattern is " + pattern); + } + } else { // time zone minutes + substring = getNextSubstring(fullInput, index, token); + try { + timeZoneMinutes = Integer.parseInt(substring); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Couldn't parse substring \"" + substring + + "\" with token " + token + " to int. Pattern is " + pattern, e); + } + if (timeZoneMinutes < 0 || timeZoneMinutes > 59) { + throw new IllegalArgumentException("Couldn't parse substring \"" + substring + + "\" to TZM because TZM range is 0 to 59. Pattern is " + pattern); + } + } + index += substring.length(); + break; + case SEPARATOR: + index = parseSeparator(fullInput, index, token); + break; + case ISO_8601_DELIMITER: + index = parseIso8601Delimiter(fullInput, index, token); + default: + //do nothing + } + } + // time zone hours -- process here because hh/hh24 may be parsed after tzh + ldt = ldt.minus(timeZoneSign * timeZoneHours, ChronoUnit.HOURS); + // time zone minutes -- process here because sign depends on tzh sign + ldt = ldt.minus( + timeZoneSign * timeZoneMinutes, ChronoUnit.MINUTES); + + // anything left unparsed at end of string? throw error + if (!fullInput.substring(index).isEmpty()) { + throw new IllegalArgumentException("Leftover input after parsing: " + + fullInput.substring(index) + " in string " + fullInput); + } + + return Timestamp.ofEpochSecond(ldt.toEpochSecond(ZoneOffset.UTC), ldt.getNano()); + } + + public Date parseDate(String input){ + return Date.ofEpochMilli(parseTimestamp(input).toEpochMilli()); + } + /** + * Return the next substring to parse. Length is either specified or token.length, but a + * separator or an ISO-8601 delimiter can cut the substring short. (e.g. if the token pattern is + * "YYYY" we expect the next 4 characters to be 4 numbers. However, if it is "976/" then we + * return "976" because a separator cuts it short.) + */ + private String getNextSubstring(String s, int begin, Token token) { + return getNextSubstring(s, begin, begin + token.length, token); + } + + private String getNextSubstring(String s, int begin, int end, Token token) { + if (end > s.length()) { + end = s.length(); + } + s = s.substring(begin, end); + if (token.temporalField == ChronoField.AMPM_OF_DAY) { + if (s.charAt(1) == 'm' || s.charAt(1) == 'M') { // length 2 + return s.substring(0, 2); + } else { + return s; + } + } + for (String sep : VALID_SEPARATORS) { + if (s.contains(sep)) { + s = s.substring(0, s.indexOf(sep)); + } + } + // TODO this will cause problems with DAY (for example, Thursday starts with T) + for (String delimiter : VALID_ISO_8601_DELIMITERS) { + if (s.toLowerCase().contains(delimiter)) { + s = s.substring(0, s.toLowerCase().indexOf(delimiter)); + } + } + + return s; + } + + /** + * Get the integer value of a temporal substring. + */ + private int parseTemporal(String substring, Token token){ + // exceptions to the rule + if (token.temporalField == ChronoField.AMPM_OF_DAY) { + return substring.toLowerCase().startsWith("a") ? AM : PM; + + } else if (token.temporalField == ChronoField.YEAR) { + String currentYearString = String.valueOf(LocalDateTime.now().getYear()); + //deal with round years + if (token.string.startsWith("r") && substring.length() == 2) { + int currFirst2Digits = Integer.parseInt(currentYearString.substring(0, 2)); + int currLast2Digits = Integer.parseInt(currentYearString.substring(2)); + int valLast2Digits = Integer.parseInt(substring); + if (valLast2Digits < FIFTY && currLast2Digits >= FIFTY) { + currFirst2Digits += 1; + } else if (valLast2Digits >= FIFTY && currLast2Digits < FIFTY) { + currFirst2Digits -= 1; + } + substring = String.valueOf(currFirst2Digits) + substring; + } else { // fill in prefix digits with current date + substring = currentYearString.substring(0, 4 - substring.length()) + substring; + } + + } else if (token.temporalField == ChronoField.NANO_OF_SECOND) { + int i = Integer.min(token.length, substring.length()); + substring += StringUtils.repeat("0", NANOS_MAX_LENGTH - i); + } + + // the rule + try { + return Integer.parseInt(substring); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Couldn't parse substring \"" + substring + + "\" with token " + token + " to integer. Pattern is " + pattern, e); + } + } + + /** + * Parse the next separator(s). At least one separator character is expected. Separator + * characters are interchangeable. + * + * Caveat: If the last separator character in the separator substring is "-" and is immediately + * followed by a time zone hour (tzh) token, it's a negative sign and not counted as a + * separator, UNLESS this is the only separator character in the separator substring (in + * which case it is not counted as the negative sign). + * + * @throws IllegalArgumentException if separator is missing + */ + private int parseSeparator(String fullInput, int index, Token token){ + int separatorsFound = 0; + int begin = index; + + while (index < fullInput.length() && + VALID_SEPARATORS.contains(fullInput.substring(index, index + 1))) { + if (!isLastCharacterOfSeparator(index, fullInput) || !(nextTokenIs("tzh", token)) + || separatorsFound == 0) { + separatorsFound++; + } + index++; + } + + if (separatorsFound == 0) { + throw new IllegalArgumentException("Missing separator at index " + index); + } + return begin + separatorsFound; + } + + private int parseIso8601Delimiter(String fullInput, int index, Token token) { + String substring; + substring = fullInput.substring(index, index + 1); + if (token.string.equalsIgnoreCase(substring)) { + index++; + } else { + throw new IllegalArgumentException( + "Missing ISO 8601 delimiter " + token.string.toUpperCase()); + } + return index; + } + + /** + * Is the next character something other than a separator? + */ + private boolean isLastCharacterOfSeparator(int index, String string) { + if (index == string.length()-1) { // if we're at the end of the string, yes + return true; + } + return !VALID_SEPARATORS.contains(string.substring(index + 1, index + 2)); + } + + /** + * Does the temporalUnit/temporalField of the next token match the pattern's? + */ + private boolean nextTokenIs(String pattern, Token currentToken) { + // make sure currentToken isn't the last one + if (tokens.indexOf(currentToken) == tokens.size() - 1) { + return false; + } + Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1); + pattern = pattern.toLowerCase(); + return (VALID_TIME_ZONE_TOKENS.containsKey(pattern) + && VALID_TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit + || VALID_TEMPORAL_TOKENS.containsKey(pattern) + && VALID_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField); + } + + @Override public String getPattern() { + return pattern; + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java new file mode 100644 index 0000000000..1e838be886 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Deals with formatting and parsing of datetime objects. + */ +package org.apache.hadoop.hive.common.format.datetime; diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java index 6ecfcf65c9..c1eb47153e 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Date.java +++ common/src/java/org/apache/hadoop/hive/common/type/Date.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

+ * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.common.type; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; + import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -72,6 +75,17 @@ public String toString() { return localDate.format(PRINT_FORMATTER); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + try { + return formatter.format(this); + } catch (IllegalArgumentException e) { + return null; + } + } + public int hashCode() { return localDate.hashCode(); } @@ -137,6 +151,13 @@ public static Date valueOf(String s) { return new Date(localDate); } + public static Date valueOf(String s, HiveDateTimeFormatter formatter) { + if (formatter == null) { + return valueOf(s); + } + return formatter.parseDate(s); + } + public static Date ofEpochDay(int epochDay) { return new Date(LocalDate.ofEpochDay(epochDay)); } diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java index a8b7b6d186..cea1e8c2e1 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java +++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

+ * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; + import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -101,6 +103,17 @@ public String toString() { return localDateTime.format(PRINT_FORMATTER); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + try { + return formatter.format(this); + } catch (IllegalArgumentException e) { + return null; + } + } + public int hashCode() { return localDateTime.hashCode(); } @@ -166,6 +179,13 @@ public static Timestamp valueOf(String s) { return new Timestamp(localDateTime); } + public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter) { + if (formatter == null) { + return valueOf(s); + } + return formatter.parseTimestamp(s); + } + public static Timestamp ofEpochSecond(long epochSecond) { return ofEpochSecond(epochSecond, 0); } diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java index f26f8ae01e..525c95a63d 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import java.math.BigDecimal; @@ -171,6 +172,18 @@ public static long millisToSeconds(long millis) { private static final int DATE_LENGTH = "YYYY-MM-DD".length(); + public static Timestamp stringToTimestamp(String s, HiveDateTimeFormatter formatter) { + if (formatter == null) { + return stringToTimestamp(s); + } + + try { + return Timestamp.valueOf(s, formatter); + } catch (IllegalArgumentException e) { + return null; + } + } + public static Timestamp stringToTimestamp(String s) { s = s.trim(); // Handle simpler cases directly avoiding exceptions diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java index 5db14f1906..22bcd98c1d 100644 --- common/src/java/org/apache/hive/common/util/DateParser.java +++ common/src/java/org/apache/hive/common/util/DateParser.java @@ -17,6 +17,7 @@ */ package org.apache.hive.common.util; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; /** @@ -36,9 +37,13 @@ public Date parseDate(String strValue) { } public boolean parseDate(String strValue, Date result) { + return parseDate(strValue, result, null); + } + + public boolean parseDate(String strValue, Date result, HiveDateTimeFormatter formatter) { Date parsedVal; try { - parsedVal = Date.valueOf(strValue); + parsedVal = Date.valueOf(strValue, formatter); } catch (IllegalArgumentException e) { parsedVal = null; } diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java new file mode 100644 index 0000000000..82009f08e1 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; + +/** + * Test class for HiveJavaDateTimeFormatter. + */ +public class TestHiveJavaDateTimeFormatter { + + private static final DateTimeFormatter DATE_TIME_FORMATTER; + static { + DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); + builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd(); + DATE_TIME_FORMATTER = builder.toFormatter(); + } + private HiveDateTimeFormatter formatter = new HiveJavaDateTimeFormatter(DATE_TIME_FORMATTER); + + @Test + public void testFormat() { + Timestamp ts = Timestamp.valueOf("2019-01-01 00:00:00.99999"); + Assert.assertEquals("2019-01-01 00:00:00.99999", formatter.format(ts)); + } + + @Test + public void testParse() { + String s = "2019-01-01 00:00:00.99999"; + Assert.assertEquals(Timestamp.valueOf("2019-01-01 00:00:00.99999"), + formatter.parseTimestamp(s)); + } + +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java new file mode 100644 index 0000000000..d189c7b042 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.text.SimpleDateFormat; +import java.time.ZoneOffset; +import java.util.TimeZone; + +/** + * Tests HiveSimpleDateFormatter. + */ +public class TestHiveSimpleDateFormatter { + + private HiveDateTimeFormatter formatter = + new HiveSimpleDateFormatter("yyyy-MM-dd HH:mm:ss", TimeZone.getTimeZone(ZoneOffset.UTC)); + + @Test + public void testFormat() { + verifyFormat("2019-01-01 01:01:01"); + verifyFormat("2019-01-01 00:00:00"); + verifyFormat("1960-01-01 23:00:00"); + } + + private void verifyFormat(String s) { + Timestamp ts = Timestamp.valueOf(s); + Assert.assertEquals(s, formatter.format(ts)); + } + + @Test + public void testParse() { + verifyParse("2019-01-01 01:10:10"); + verifyParse("1960-01-01 23:00:00"); + + } + + private void verifyParse(String s) { + Timestamp ts = Timestamp.valueOf(s); + Assert.assertEquals(ts, formatter.parseTimestamp(s)); + } +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java new file mode 100644 index 0000000000..a0fd2f2f6a --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.format.datetime; + +import com.sun.tools.javac.util.List; +import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.ResolverStyle; +import java.time.format.SignStyle; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalField; +import java.util.ArrayList; + +import static java.time.temporal.ChronoField.DAY_OF_MONTH; +import static java.time.temporal.ChronoField.HOUR_OF_DAY; +import static java.time.temporal.ChronoField.MINUTE_OF_HOUR; +import static java.time.temporal.ChronoField.MONTH_OF_YEAR; +import static java.time.temporal.ChronoField.SECOND_OF_MINUTE; +import static java.time.temporal.ChronoField.YEAR; + +/** + * Test class for HiveSqlDateTimeFormatter. + */ + +public class TestHiveSqlDateTimeFormatter extends TestCase { + + private HiveSqlDateTimeFormatter formatter; + + public void testSetPattern() { + verifyPatternParsing(" ---yyyy-\'-:- -,.;/MM-dd--", new ArrayList<>(List.of( + null, + ChronoField.YEAR, + null, + ChronoField.MONTH_OF_YEAR, + null, + ChronoField.DAY_OF_MONTH, + null + ))); + + verifyPatternParsing("ymmdddhh24::mi:ss A.M. pm", 25, "ymmdddhh24::mi:ss A.M. pm", + new ArrayList<>(List.of( + ChronoField.YEAR, + ChronoField.MONTH_OF_YEAR, + ChronoField.DAY_OF_YEAR, + ChronoField.HOUR_OF_DAY, + null, ChronoField.MINUTE_OF_HOUR, + null, ChronoField.SECOND_OF_MINUTE, + null, ChronoField.AMPM_OF_DAY, + null, ChronoField.AMPM_OF_DAY + ))); + } + + public void testSetPatternWithBadPatterns() { + verifyBadPattern("e", true); + verifyBadPattern("yyyy-1", true); + + verifyBadPattern("yyyy Y", true); + verifyBadPattern("yyyy R", true); + verifyBadPattern("yyyy-MM-DDD", true); + verifyBadPattern("yyyy-mm-DD DDD", true); + verifyBadPattern("yyyy-mm-dd HH24 HH12", true); + verifyBadPattern("yyyy-mm-dd HH24 AM", true); + verifyBadPattern("yyyy-mm-dd HH24 SSSSS", true); + verifyBadPattern("yyyy-mm-dd HH12 SSSSS", true); + verifyBadPattern("yyyy-mm-dd SSSSS AM", true); + verifyBadPattern("yyyy-mm-dd MI SSSSS", true); + verifyBadPattern("yyyy-mm-dd SS SSSSS", true); + + verifyBadPattern("tzm", false); + verifyBadPattern("tzh", false); + } + + public void testFormatTimestamp() { + checkFormatTs("rr rrrr ddd", "2018-01-03 00:00:00", "18 2018 003"); + checkFormatTs("yyyy-mm-ddtsssss.ff4z", "2018-02-03 00:00:10.777777777", "2018-02-03T00010.7777Z"); + checkFormatTs("hh24:mi:ss.ff1", "2018-02-03 01:02:03.999999999", "01:02:03.9"); + checkFormatTs("y yyy hh:mi:ss.ffz", "2018-02-03 01:02:03.0070070", "8 018 01:02:03.007007Z"); + checkFormatTs("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03 01:02:03.0070070", "am a.m. am a.m. AM A.M. AM A.M."); + } + + private void checkFormatTs(String pattern, String input, String expectedOutput) { + formatter = new HiveSqlDateTimeFormatter(pattern, false); + assertEquals(expectedOutput, formatter.format(toTimestamp(input))); + } + + public void testFormatDate() { + checkFormatDate("rr rrrr ddd", "2018-01-03", "18 2018 003"); + checkFormatDate("yyyy-mm-ddtsssss.ff4z", "2018-02-03", "2018-02-03T00000.0000Z"); + checkFormatDate("hh24:mi:ss.ff1", "2018-02-03", "00:00:00.0"); + checkFormatDate("y yyy T hh:mi:ss.ffz", "2018-02-03", "8 018 T 00:00:00.0Z"); + checkFormatDate("am a.m. pm p.m. AM A.M. PM P.M.", "2018-02-03", "am a.m. am a.m. AM A.M. AM A.M."); + checkFormatDate("DDD", "2019-12-31", "365"); + checkFormatDate("DDD", "2020-12-31", "366"); + } + + private void checkFormatDate(String pattern, String input, String expectedOutput) { + formatter = new HiveSqlDateTimeFormatter(pattern, false); + assertEquals(expectedOutput, formatter.format(toDate(input))); + } + + public void testParseTimestamp() { + checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ff8z", "2018-02-03T04:05:06.5665Z", "2018-02-03 04:05:06.5665"); + checkParseTimestamp("yyyy-mm-dd hh24:mi:ss.ff", "2018-02-03 04:05:06.555555555", "2018-02-03 04:05:06.555555555"); + checkParseTimestamp("yy-mm-dd hh12:mi:ss", "99-2-03 04:05:06", "2099-02-03 04:05:06"); + checkParseTimestamp("rr-mm-dd", "00-02-03", "2000-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "49-02-03", "2049-02-03 00:00:00"); + checkParseTimestamp("rr-mm-dd", "50-02-03", "1950-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "00-02-03", "2000-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "49-02-03", "2049-02-03 00:00:00"); + checkParseTimestamp("rrrr-mm-dd", "50-02-03", "1950-02-03 00:00:00"); + checkParseTimestamp("yyy-mm-dd", "018-01-01", "2018-01-01 00:00:00"); + checkParseTimestamp("yyyyddd", "2018284", "2018-10-11 00:00:00"); + checkParseTimestamp("yyyyddd", "20184", "2018-01-04 00:00:00"); + checkParseTimestamp("yyyy-mm-ddThh24:mi:ss.ffz", "2018-02-03t04:05:06.444Z", "2018-02-03 04:05:06.444"); + checkParseTimestamp("hh:mi:ss A.M.", "04:05:06 P.M.", "1970-01-01 16:05:06"); + checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00--1:-30", "2019-01-01 15:30:00"); + checkParseTimestamp("YYYY-MM-DD HH24:MI TZH:TZM", "2019-1-1 14:00-1:30", "2019-01-01 12:30:00"); + checkParseTimestamp("TZM:TZH", "1 -3", "1970-01-01 03:01:00"); + checkParseTimestamp("TZH:TZM", "-0:30", "1970-01-01 00:30:00"); + checkParseTimestamp("TZM/YYY-MM-TZH/DD", "0/333-01-11/02", "2333-01-01 13:00:00"); + checkParseTimestamp("YYYY-MM-DD HH12:MI AM", "2019-01-01 11:00 p.m.", "2019-01-01 23:00:00"); + checkParseTimestamp("YYYY-MM-DD HH12:MI A.M..", "2019-01-01 11:00 pm.", "2019-01-01 23:00:00"); + + //Test "day in year" token in a leap year scenario + checkParseTimestamp("YYYY DDD", "2000 60", "2000-02-29 00:00:00"); + checkParseTimestamp("YYYY DDD", "2000 61", "2000-03-01 00:00:00"); + checkParseTimestamp("YYYY DDD", "2000 366", "2000-12-31 00:00:00"); + //Test timezone offset parsing without separators + checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 02:45:00"); + checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 02:45:00"); + checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 13:15:00"); + } + + private void checkParseTimestamp(String pattern, String input, String expectedOutput) { + formatter = new HiveSqlDateTimeFormatter(pattern, true); + assertEquals(toTimestamp(expectedOutput), formatter.parseTimestamp(input)); + } + + public void testParseDate() { + checkParseDate("yyyy-mm-dd hh mi ss", "2018/01/01 2.2.2", "2018-01-01"); + checkParseDate("rr-mm-dd", "00-02-03", "2000-02-03"); + checkParseDate("rr-mm-dd", "49-02-03", "2049-02-03"); + checkParseDate("rr-mm-dd", "50-02-03", "1950-02-03"); + } + + private void checkParseDate(String pattern, String input, String expectedOutput) { + formatter = new HiveSqlDateTimeFormatter(pattern, true); + assertEquals(toDate(expectedOutput), formatter.parseDate(input)); + } + + public void testParseTimestampError() { + verifyBadParseString("yyyy", "2019-02-03"); + verifyBadParseString("yyyy-mm-dd ", "2019-02-03"); //separator missing + verifyBadParseString("yyyy-mm-dd", "2019-02-03..."); //extra separators + verifyBadParseString("yyyy-mm-dd hh12:mi:ss", "2019-02-03 14:00:00"); //hh12 out of range + verifyBadParseString("yyyy-dddsssss", "2019-912345"); + verifyBadParseString("yyyy-mm-dd", "2019-13-23"); //mm out of range + verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range + verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range + verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range + } + + private void verifyBadPattern(String string, boolean forParsing) { + try { + formatter = new HiveSqlDateTimeFormatter(string, forParsing); + fail(); + } catch (Exception e) { + assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName()); + } + } + + /** + * Verify pattern is parsed correctly. + * Check: + * -token.temporalField for each token + * -sum of token.lengths + * -concatenation of token.strings + */ + private void verifyPatternParsing(String pattern, ArrayList temporalFields) { + verifyPatternParsing(pattern, pattern.length(), pattern.toLowerCase(), temporalFields); + } + + private void verifyPatternParsing(String pattern, int expectedPatternLength, + String expectedPattern, ArrayList temporalFields) { + formatter = new HiveSqlDateTimeFormatter(pattern, false); + assertEquals(temporalFields.size(), formatter.tokens.size()); + StringBuilder sb = new StringBuilder(); + int actualPatternLength = 0; + for (int i = 0; i < temporalFields.size(); i++) { + assertEquals("Generated list of tokens not correct", temporalFields.get(i), + formatter.tokens.get(i).temporalField); + sb.append(formatter.tokens.get(i).string); + actualPatternLength += formatter.tokens.get(i).length; + } + assertEquals("Token strings concatenated don't match original pattern string", + expectedPattern, sb.toString()); + assertEquals(expectedPatternLength, actualPatternLength); + } + + private void verifyBadParseString(String pattern, String string) { + try { + formatter = new HiveSqlDateTimeFormatter(pattern, true); + formatter.parseTimestamp(string); + fail(); + } catch (Exception e) { + assertEquals(e.getClass().getName(), IllegalArgumentException.class.getName()); + } + } + + + // Methods that construct datetime objects using java.time.DateTimeFormatter. + + public static Date toDate(String s) { + LocalDate localDate = LocalDate.parse(s, DATE_FORMATTER); + return Date.ofEpochDay((int) localDate.toEpochDay()); + } + + /** + * This is effectively the old Timestamp.valueOf method. + */ + public static Timestamp toTimestamp(String s) { + LocalDateTime localDateTime = LocalDateTime.parse(s.trim(), TIMESTAMP_FORMATTER); + return Timestamp.ofEpochSecond( + localDateTime.toEpochSecond(ZoneOffset.UTC), localDateTime.getNano()); + } + + private static final DateTimeFormatter DATE_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd"); + private static final DateTimeFormatter TIMESTAMP_FORMATTER; + static { + DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); + builder.appendValue(YEAR, 1, 10, SignStyle.NORMAL).appendLiteral('-') + .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NORMAL).appendLiteral('-') + .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NORMAL) + .optionalStart().appendLiteral(" ") + .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NORMAL).appendLiteral(':') + .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NORMAL).appendLiteral(':') + .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL) + .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd() + .optionalEnd(); + TIMESTAMP_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT); + } +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java new file mode 100644 index 0000000000..70ee4266f4 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests formatting and parsing of datetime objects. + */ +package org.apache.hadoop.hive.common.format.datetime; diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java index 00a7904ecf..5bf1119cef 100644 --- common/src/test/org/apache/hive/common/util/TestTimestampParser.java +++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java @@ -116,8 +116,7 @@ public void testPattern1() { }; String[] invalidCases = { - "1945-12-31-23:59:59", - "12345", + "12345" }; testValidCases(tp, validCases); @@ -147,8 +146,7 @@ public void testMillisParser() { }; String[] invalidCases = { - "1945-12-31-23:59:59", - "1420509274123-", + "1420509274123-" }; testValidCases(tp, validCases); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index fa9d1e9783..0226bb9d0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -42,8 +42,11 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToVarCharViaLongToVarChar; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastCharToBinary; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToCharWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToStringWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarCharWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString; @@ -67,10 +70,13 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToCharWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToStringWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToVarCharWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConvertDecimal64ToDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64ColumnInList; @@ -2163,9 +2169,9 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) throws HiveException { VectorExpression ve = null; - Constructor ctor = getConstructor(vclass); - int numParams = ctor.getParameterTypes().length; int argsLength = (args == null) ? 0 : args.length; + Constructor ctor = getConstructor(vclass, argsLength); + int numParams = ctor.getParameterTypes().length; if (numParams == 0) { try { ve = (VectorExpression) ctor.newInstance(); @@ -2173,7 +2179,7 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex)); } - } else if (numParams == argsLength) { + } else if (numParams == argsLength) { // frogmethod this causes problems try { ve = (VectorExpression) ctor.newInstance(args); } catch (Exception ex) { @@ -3139,9 +3145,17 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } else if (isDecimalFamily(inputType)) { return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { + return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { //second argument will be format string + return createVectorExpression(CastDateToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { + return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { //second argument will be format string + return createVectorExpression(CastTimestampToStringWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isStringFamily(inputType)) { // STRING and VARCHAR types require no conversion, so use a no-op. @@ -3173,9 +3187,17 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re } else if (isDecimalFamily(inputType)) { return createVectorExpression(CastDecimalToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { + return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { + return createVectorExpression(CastDateToCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { + return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { + return createVectorExpression(CastTimestampToCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } @@ -3203,9 +3225,17 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo } else if (isDecimalFamily(inputType)) { return createVectorExpression(CastDecimalToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { + return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { + return createVectorExpression(CastDateToVarCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + if (childExpr.size() < 2) { + return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } else { + return createVectorExpression(CastTimestampToVarCharWithFormat.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + } } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } @@ -4089,7 +4119,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio return ((org.apache.hadoop.hive.common.type.Timestamp) java).toSqlTimestamp(); } - private Constructor getConstructor(Class cl) throws HiveException { + private Constructor getConstructor(Class cl, int argsCount) throws HiveException { try { Constructor [] ctors = cl.getDeclaredConstructors(); if (ctors.length == 1) { @@ -4097,11 +4127,12 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio } Constructor defaultCtor = cl.getConstructor(); for (Constructor ctor : ctors) { - if (!ctor.equals(defaultCtor)) { + if (!ctor.equals(defaultCtor) && ctor.getParameterCount() - 1 == argsCount) { return ctor; } } - throw new HiveException("Only default constructor found"); + throw new HiveException("Only default constructor found, or no constructor found with " + + argsCount + "arguments"); } catch (Exception ex) { throw new HiveException(ex); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java new file mode 100644 index 0000000000..01a1028e02 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToCharWithFormat.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO CHAR() WITH FORMAT ). + */ +public class CastDateToCharWithFormat extends CastDateToChar { + + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastDateToCharWithFormat() { + super(); + } + + public CastDateToCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to char with format )," + + " but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false); + } + + @Override + protected void func(BytesColumnVector outV, long[] vector, int i) { + super.func(outV, vector, i, formatter); + } + + @Override + public String vectorExpressionParameters() { + return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java index dfa9f8a00d..978875e312 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java @@ -18,28 +18,31 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; import java.sql.Date; -import java.text.SimpleDateFormat; import java.util.TimeZone; public class CastDateToString extends LongToStringUnaryUDF { private static final long serialVersionUID = 1L; protected transient Date dt = new Date(0); - private transient SimpleDateFormat formatter; + private transient HiveDateTimeFormatter formatter; public CastDateToString() { super(); - formatter = new SimpleDateFormat("yyyy-MM-dd"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + initFormatter(); } public CastDateToString(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); - formatter = new SimpleDateFormat("yyyy-MM-dd"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + initFormatter(); + } + + public void initFormatter() { + formatter = new HiveSimpleDateFormatter("yyyy-MM-dd", TimeZone.getTimeZone("UTC")); } // The assign method will be overridden for CHAR and VARCHAR. @@ -47,10 +50,23 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { outV.setVal(i, bytes, 0, length); } + private void assignNull(BytesColumnVector outV, int i) { + outV.isNull[i] = true; + outV.noNulls = false; + } + @Override protected void func(BytesColumnVector outV, long[] vector, int i) { - dt.setTime(DateWritableV2.daysToMillis((int) vector[i])); - byte[] temp = formatter.format(dt).getBytes(); - assign(outV, i, temp, temp.length); + func(outV, vector, i, formatter); + } + + protected void func(BytesColumnVector outV, long[] vector, int i, HiveDateTimeFormatter formatter) { + try { + byte[] temp = formatter.format(Timestamp.ofEpochMilli( + org.apache.hadoop.hive.common.type.Date.ofEpochDay((int) vector[i]).toEpochMilli())).getBytes(); + assign(outV, i, temp, temp.length); + } catch (Exception e) { + assignNull(outV, i); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java new file mode 100644 index 0000000000..b05c2cb521 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToStringWithFormat.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO STRING WITH FORMAT ). + */ +public class CastDateToStringWithFormat extends CastDateToString { + private static final long serialVersionUID = 1L; + protected transient Date dt; + private HiveDateTimeFormatter formatter; + + public CastDateToStringWithFormat() { + super(); + } + + public CastDateToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to string with format )," + + " but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false); + } + + // The assign method will be overridden for CHAR and VARCHAR. + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); + } + + @Override + protected void func(BytesColumnVector outV, long[] vector, int i) { + super.func(outV, vector, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.INT_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java new file mode 100644 index 0000000000..d89bba9dcf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarCharWithFormat.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO VARCHAR() WITH FORMAT ). + */ +public class CastDateToVarCharWithFormat extends CastDateToVarChar { + + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastDateToVarCharWithFormat() { + super(); + } + + public CastDateToVarCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to varchar with format )," + + " but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false); + } + + @Override + protected void func(BytesColumnVector outV, long[] vector, int i) { + super.func(outV, vector, i, formatter); + } + + @Override + public String vectorExpressionParameters() { + return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index a6dff12e1a..44a451b3bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -151,10 +152,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } - private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { + /** + * Used by CastStringToDate. + */ + protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { + evaluate(outputColVector, inV, i, null); + } + + /** + * Used by CastStringToDateWithFormat. + */ + protected void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i, + HiveDateTimeFormatter formatter) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); Date hDate = new Date(); - if (dateParser.parseDate(dateString, hDate)) { + if (dateParser.parseDate(dateString, hDate, formatter)) { outputColVector.vector[i] = DateWritableV2.dateToDays(hDate); return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java new file mode 100644 index 0000000000..ba5c12f61d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDateWithFormat.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO DATE WITH FORMAT ). + */ +public class CastStringToDateWithFormat extends CastStringToDate { + + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastStringToDateWithFormat() { + super(); + } + + public CastStringToDateWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to date with format )," + + " but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), true); + } + + @Override + protected void evaluate(LongColumnVector outputColVector, + BytesColumnVector inputColVector, int i) { + super.evaluate(outputColVector, inputColVector, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java index b48b0136eb..f8d81cdb13 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestamp.java @@ -19,8 +19,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.util.Arrays; -import java.sql.Timestamp; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -143,21 +144,40 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } - private void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) { + /** + * This is used by CastStringToTimestamp. + */ + protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, int i) { + evaluate(outputColVector, inputColVector, i, null); + } + + /** + * This is used by CastStringToTimestampWithFormat. + */ + protected void evaluate(TimestampColumnVector outputColVector, BytesColumnVector inputColVector, + int i, HiveDateTimeFormatter formatter) { try { - org.apache.hadoop.hive.common.type.Timestamp timestamp = - PrimitiveObjectInspectorUtils.getTimestampFromString( + Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestampFromString( new String( inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], - "UTF-8")); - outputColVector.set(i, timestamp.toSqlTimestamp()); + "UTF-8"), + formatter); + if (timestamp != null) { + outputColVector.set(i, timestamp.toSqlTimestamp()); + } else { + setNullValue(outputColVector, i); + } } catch (Exception e) { - outputColVector.setNullValue(i); - outputColVector.isNull[i] = true; - outputColVector.noNulls = false; + setNullValue(outputColVector, i); } } + private void setNullValue(TimestampColumnVector outputColVector, int i) { + outputColVector.setNullValue(i); + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } + @Override public String vectorExpressionParameters() { return getColumnParamString(0, inputColumn); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java new file mode 100644 index 0000000000..a8a3749fad --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToTimestampWithFormat.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO TIMESTAMP WITH FORMAT ). + */ +public class CastStringToTimestampWithFormat extends CastStringToTimestamp { + + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastStringToTimestampWithFormat() { + super(); + } + + public CastStringToTimestampWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to timestamp with format" + + "), but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), true); + } + + @Override + protected void evaluate(TimestampColumnVector outputColVector, + BytesColumnVector inputColVector, int i) { + super.evaluate(outputColVector, inputColVector, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java new file mode 100644 index 0000000000..91f60a8892 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToCharWithFormat.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO CHAR( WITH FORMAT ). + */ +public class CastTimestampToCharWithFormat extends CastTimestampToChar { + + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastTimestampToCharWithFormat() { + super(); + } + + public CastTimestampToCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to char with format )," + + " but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false); + } + + public CastTimestampToCharWithFormat( + int inputColumn, byte[] patternBytes, int maxLength, int outputColumnNum) { + this(inputColumn, patternBytes, outputColumnNum); + setMaxLength(maxLength); + } + + @Override + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { + super.func(outV, inV, i, formatter); + } + + @Override + public String vectorExpressionParameters() { + return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java index adc3a9d7b9..a1953871d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.WrongFormatterException; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -42,12 +45,25 @@ PRINT_FORMATTER = builder.toFormatter(); } + private transient HiveDateTimeFormatter format; + public CastTimestampToString() { super(); + initFormatter(); } public CastTimestampToString(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); + initFormatter(); + } + + private void initFormatter() { + try { + format = new HiveJavaDateTimeFormatter(); + format.setFormatter(PRINT_FORMATTER); + } catch (WrongFormatterException e) { + // this will never happen + } } // The assign method will be overridden for CHAR and VARCHAR. @@ -55,14 +71,27 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { outV.setVal(i, bytes, 0, length); } + private void assignNull(BytesColumnVector outV, int i) { + outV.isNull[i] = true; + outV.noNulls = false; + } + @Override protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { - byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC) - .withNano(inV.nanos[i]) - .format(PRINT_FORMATTER).getBytes(); - assign(outV, i, temp, temp.length); + func(outV, inV, i, format); } + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i, HiveDateTimeFormatter formatter) { + try { + String formattedLocalDateTime = formatter.format( + org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i])); + + byte[] temp = formattedLocalDateTime.getBytes(); + assign(outV, i, temp, temp.length); + } catch (Exception e) { + assignNull(outV, i); + } + } public static String getTimestampString(Timestamp ts) { return LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java new file mode 100644 index 0000000000..375634c2a8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToStringWithFormat.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO STRING WITH FORMAT ). + */ +public class CastTimestampToStringWithFormat extends CastTimestampToString { + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastTimestampToStringWithFormat() { + super(); + } + + public CastTimestampToStringWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to string with format" + + " ), but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false); + } + + @Override + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { + super.func(outV, inV, i, formatter); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java new file mode 100644 index 0000000000..56f2ce24c8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToVarCharWithFormat.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; + +import java.nio.charset.StandardCharsets; + +/** + * Vectorized UDF for CAST ( TO VARCHAR( WITH FORMAT ). + */ +public class CastTimestampToVarCharWithFormat extends CastTimestampToVarChar { + + private static final long serialVersionUID = 1L; + private HiveDateTimeFormatter formatter; + + public CastTimestampToVarCharWithFormat() { + super(); + } + + public CastTimestampToVarCharWithFormat(int inputColumn, byte[] patternBytes, int outputColumnNum) { + super(inputColumn, outputColumnNum); + + if (patternBytes == null) { + throw new IllegalStateException("Tried to cast ( to varchar with format" + + "), but not found"); + } + formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(new String(patternBytes, StandardCharsets.UTF_8), false); + } + + public CastTimestampToVarCharWithFormat( + int inputColumn, byte[] patternBytes, int maxLength, int outputColumnNum) { + this(inputColumn, patternBytes, outputColumnNum); + setMaxLength(maxLength); + } + + @Override + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { + super.func(outV, inV, i, formatter); + } + + @Override + public String vectorExpressionParameters() { + return super.vectorExpressionParameters() + ", format pattern: " + formatter.getPattern(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 1134cf3bd1..3220f159fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -101,9 +101,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import java.math.BigDecimal; import java.math.BigInteger; @@ -326,13 +329,13 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { childRexNodeLst.add(tmpRN); } - // See if this is an explicit cast. + // See if this is an explicit cast. Cast with format handled below. RexNode expr = null; RelDataType retType = null; expr = handleExplicitCast(func, childRexNodeLst); if (expr == null) { - // This is not a cast; process the function. + // This is not an explicit cast; process the function. retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); @@ -387,6 +390,29 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { childRexNodeLst.clear(); childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, rangeL, op)); childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, op, rangeH)); + + // Handle cast with format (TODO GenericUDFToTimestampLocalTZ will also need this treatment) + // by adding extra typeInfo parameters (e.g. length) as third argument to UDF, + // Otherwise an optimized TOK_FUNCTION subtree in the AST will look like: + // (tok_function char (. (tok_table_or_col ) ) '') + // which is missing char length info and will throw a NPE. + // Resulting TOK_FUNCTION subtree in the AST will look like: + // (tok_function char (. (tok_table_or_col
) ) '' ) + // and the 3rd argument will be handled in GenericUDFToChar and GenericUDFToVarchar. + } else if (childRexNodeLst.size() == 2) { + GenericUDF udf = func.getGenericUDF(); + if (udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToChar) { + ExprNodeConstantDesc exprNodeDesc = new ExprNodeConstantDesc(); + if (udf instanceof GenericUDFToChar) { + exprNodeDesc.setValue( + ((CharTypeInfo) ((GenericUDFToChar) udf).getTypeInfo()).getLength()); + } else { //GenericUDFToVarchar + exprNodeDesc.setValue( + ((VarcharTypeInfo) ((GenericUDFToVarchar) udf).getTypeInfo()).getLength()); + } + exprNodeDesc.setTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("int")); + childRexNodeLst.add(2, convert(exprNodeDesc)); + } } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index f22511ad67..2a65f0e74e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -248,7 +248,8 @@ castExpression expression KW_AS primitiveType - RPAREN -> ^(TOK_FUNCTION primitiveType expression) + (KW_FORMAT expression)? + RPAREN -> ^(TOK_FUNCTION primitiveType expression*) ; caseExpression diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java index bcc4114099..6c3c3349bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import java.text.SimpleDateFormat; - import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol; @@ -46,7 +44,6 @@ + " '2009-07-29'") @VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class}) public class GenericUDFDateSub extends GenericUDFDateAdd { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); public GenericUDFDateSub() { this.signModifier = -1; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java index 70f57b7727..83cf02866a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -29,9 +29,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToTimestampWithFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; @@ -48,9 +48,12 @@ * */ @Description(name = "timestamp", -value = "cast(date as timestamp) - Returns timestamp") + value = "cast( as timestamp [format ]) - Returns timestamp", + extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will be " + + "used.") @VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class, - CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class}) + CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class, + CastStringToTimestampWithFormat.class}) public class GenericUDFTimestamp extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; @@ -88,6 +91,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); tc.setIntToTimestampInSeconds(intToTimestampInSeconds); + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null) { + HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1), true); + tc.setDateTimeFormatter(formatter); + } + return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; } @@ -97,17 +107,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if (o0 == null) { return null; } - return tc.convert(o0); } @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (1 <= children.length && children.length <= 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); - sb.append(" AS TIMESTAMP)"); + sb.append(" AS TIMESTAMP"); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } + sb.append(")"); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java index 899abf76b8..3626d6c213 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java @@ -19,6 +19,8 @@ import java.io.Serializable; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Description; @@ -34,12 +36,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; @Description(name = "char", -value = "CAST( as CHAR(length)) - Converts the argument to a char value.", +value = "CAST( as CHAR(length) [FORMAT ]) - Converts the argument to a char" + + "value.", extended = "Values will be truncated if the input value is too long to fit" -+ " within the char length." -+ "Example:\n " -+ " > SELECT CAST(1234 AS char(10)) FROM src LIMIT 1;\n" -+ " '1234'") + + " within the char length. If format is specified with FORMAT argument then SQL:2016 datetime" + + " formats will be used.\n" + + "Example:\n " + + " > SELECT CAST(1234 AS char(10)) FROM src LIMIT 1;\n" + + " '1234'") public class GenericUDFToChar extends GenericUDF implements SettableUDF, Serializable { private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToChar.class.getName()); @@ -55,7 +59,7 @@ public GenericUDFToChar() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { + if (arguments.length < 1) { throw new UDFArgumentException("CHAR cast requires a value argument"); } try { @@ -65,12 +69,25 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen "The function CHAR takes only primitive types"); } + // Third argument could be char length. + if (typeInfo == null && arguments.length > 2 && arguments[2] != null) { + typeInfo = new CharTypeInfo(getConstantIntValue(arguments, 2)); + } + // Check if this UDF has been provided with type params for the output char type SettableHiveCharObjectInspector outputOI; outputOI = (SettableHiveCharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); converter = new HiveCharConverter(argumentOI, outputOI); + + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null) { + HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1), false); + converter.setDateTimeFormatter(formatter); + } + return outputOI; } @@ -86,13 +103,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length >= 1 && children.length <= 3); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); sb.append(" AS CHAR("); - sb.append("" + typeInfo.getLength()); + if (typeInfo != null) { + sb.append(typeInfo.getLength()); + } else if (children.length > 2) { + sb.append(children[2]); + } sb.append(")"); + if (children.length > 1) { + sb.append(" FORMAT "); + sb.append(children[1]); + } sb.append(")"); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java index c309ffa5e3..12d9a48acf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java @@ -17,11 +17,14 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDateWithFormat; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -36,12 +39,15 @@ * GenericUDFToDate */ @Description(name = "date", - value = "CAST( as DATE) - Returns the date represented by the date string.", - extended = "date_string is a string in the format 'yyyy-MM-dd.'" + value = "CAST( as DATE [FORMAT ]) - Returns the date represented by the date string.", + extended = "date_string is a string in the format 'yyyy-MM-dd.' " + + "If format is specified with FORMAT argument then SQL:2016 datetime formats will be " + + "used for parsing." + "Example:\n " + " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n" + " '2009-01-01'") -@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class}) +@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class, + CastStringToDateWithFormat.class}) public class GenericUDFToDate extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; @@ -75,6 +81,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen dc = new DateConverter(argumentOI, PrimitiveObjectInspectorFactory.writableDateObjectInspector); + + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null) { + HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1), true); + dc.setDateTimeFormatter(formatter); + } + return PrimitiveObjectInspectorFactory.writableDateObjectInspector; } @@ -90,11 +104,16 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length == 1 || children.length == 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); - sb.append(" AS DATE)"); + sb.append(" AS DATE"); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } + sb.append(")"); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java index d5764419d6..375f40471e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToString.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -28,10 +30,12 @@ import org.slf4j.LoggerFactory; @Description(name = "string", -value = "CAST( as STRING) - Converts the argument to a string value.", -extended = "Example:\n " -+ " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n" -+ " '1234'") + value = "CAST( as STRING [FORMAT ]) - Converts the argument to a string value.", + extended = "If format is specified with FORMAT argument then SQL:2016 datetime formats will " + + "be used.\n" + + "Example:\n " + + " > SELECT CAST(1234 AS string) FROM src LIMIT 1;\n" + + " '1234'") public class GenericUDFToString extends GenericUDF { private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToString.class.getName()); @@ -43,7 +47,7 @@ public GenericUDFToString() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { + if (arguments.length < 1) { throw new UDFArgumentException("STRING cast requires a value argument"); } try { @@ -54,26 +58,39 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } converter = new TextConverter(argumentOI); + + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null) { + HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1), false); + converter.setDateTimeFormatter(formatter); + } + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - Object o0 = arguments[0].get(); - if (o0 == null) { - return null; - } + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } - return converter.convert(o0); + return converter.convert(o0); } @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length == 1 || children.length == 2); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); - sb.append(" AS STRING)"); + sb.append(" AS STRING"); + if (children.length == 2) { + sb.append(" FORMAT "); + sb.append(children[1]); + } + sb.append(")"); return sb.toString(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java index b9a2bc2b9f..32b865b119 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java @@ -19,6 +19,8 @@ import java.io.Serializable; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Description; @@ -34,12 +36,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; @Description(name = "varchar", -value = "CAST( as VARCHAR(length)) - Converts the argument to a varchar value.", +value = "CAST( as VARCHAR(length) [FORMAT ]) - Converts the argument to a " + + "varchar value.", extended = "Values will be truncated if the input value is too long to fit" -+ " within the varchar length." -+ "Example:\n " -+ " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n" -+ " '1234'") + + " within the varchar length. If format is specified with FORMAT argument then SQL:2016" + + "datetime formats will be used.\n" + + "Example:\n " + + " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n" + + " '1234'") public class GenericUDFToVarchar extends GenericUDF implements SettableUDF, Serializable { private static final Logger LOG = LoggerFactory.getLogger(GenericUDFToVarchar.class.getName()); @@ -55,7 +59,7 @@ public GenericUDFToVarchar() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { + if (arguments.length < 1) { throw new UDFArgumentException("VARCHAR cast requires a value argument"); } try { @@ -65,12 +69,25 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen "The function VARCHAR takes only primitive types"); } + // Third argument could be char length. + if (typeInfo == null && arguments.length > 2 && arguments[2] != null) { + typeInfo = new VarcharTypeInfo(getConstantIntValue(arguments, 2)); + } + // Check if this UDF has been provided with type params for the output varchar type SettableHiveVarcharObjectInspector outputOI; outputOI = (SettableHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); converter = new HiveVarcharConverter(argumentOI, outputOI); + + // for CAST WITH FORMAT + if (arguments.length > 1 && arguments[1] != null) { + HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + formatter.setPattern(getConstantStringValue(arguments, 1), false); + converter.setDateTimeFormatter(formatter); + } + return outputOI; } @@ -86,12 +103,22 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { - assert (children.length == 1); + assert (children.length >= 1 && children.length <= 3); StringBuilder sb = new StringBuilder(); sb.append("CAST( "); sb.append(children[0]); sb.append(" AS "); - sb.append(typeInfo.getQualifiedName()); + sb.append(" AS VARCHAR("); + if (typeInfo != null) { + sb.append(typeInfo.getLength()); + } else if (children.length > 2) { + sb.append(children[2]); + } + sb.append(")"); + if (children.length > 1) { + sb.append(" FORMAT "); + sb.append(children[1]); + } sb.append(")"); return sb.toString(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index 663237739e..314e394d67 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.Date; import org.junit.Assert; import org.apache.hadoop.hive.serde2.RandomTypeUtil; @@ -115,18 +116,20 @@ public void testRoundToDecimalPlaces() throws HiveException { Assert.assertEquals(1.2346d, resultV.vector[7], Double.MIN_VALUE); } - static int DAYS_LIMIT = 365 * 9999; + private static final int DAYS_LIMIT = 365 * 9999; + //approximate, so we get some negative values: + private static final int SMALLEST_EPOCH_DAY = -365 * 1969; public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) { Random r = new Random(12099); VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector inV; TimestampColumnVector outV; - inV = new LongColumnVector(); - outV = new TimestampColumnVector(); + inV = new LongColumnVector(intValues.length); + outV = new TimestampColumnVector(intValues.length); for (int i = 0; i < intValues.length; i++) { - intValues[i] = r.nextInt() % DAYS_LIMIT; + intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT; inV.vector[i] = intValues[i]; } @@ -137,6 +140,36 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i return batch; } + public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) { + // get date in timestamp out, and change timestamp out to string out + VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues); + BytesColumnVector outV = new BytesColumnVector(intValues.length); + batch.cols[1] = outV; + return batch; + } + + // For testing CastDateToStringWithFormat with + // TestVectorTypeCastsWithFormat#testCastDateToStringWithFormat + public static VectorizedRowBatch getVectorizedRowBatchDateInStringOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + LongColumnVector dateColumnV; + BytesColumnVector stringColumnV; + dateColumnV = new LongColumnVector(); + stringColumnV = new BytesColumnVector(); + + dateColumnV.vector[0] = Date.valueOf("2019-12-31").toEpochDay(); + dateColumnV.vector[1] = Date.valueOf("1776-07-04").toEpochDay(); + dateColumnV.vector[2] = Date.valueOf("2012-02-29").toEpochDay(); + dateColumnV.vector[3] = Date.valueOf("1580-08-08").toEpochDay(); + dateColumnV.vector[4] = Date.valueOf("0005-01-01").toEpochDay(); + dateColumnV.vector[5] = Date.valueOf("9999-12-31").toEpochDay(); + + batch.cols[0] = dateColumnV; + batch.cols[1] = stringColumnV; + batch.size = 6; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() { VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector lcv; @@ -277,6 +310,42 @@ public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchStringInTimestampOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + BytesColumnVector inV; + inV = new BytesColumnVector(); + inV.initBuffer(); + inV.setVal(0, StandardCharsets.UTF_8.encode("2019-12-31 00:00:00.999999999").array()); + inV.setVal(1, StandardCharsets.UTF_8.encode("1776-07-04 17:07:06.177617761").array()); + inV.setVal(2, StandardCharsets.UTF_8.encode("2012-02-29 23:59:59.999999999").array()); + inV.setVal(3, StandardCharsets.UTF_8.encode("1580-08-08 00:00:00.0").array()); + inV.setVal(4, StandardCharsets.UTF_8.encode("0005-01-01 00:00:00.0").array()); + inV.setVal(5, StandardCharsets.UTF_8.encode("9999-12-31 23:59:59.999999999").array()); + + batch.cols[0] = inV; + + batch.size = 6; + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchStringInDateOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + BytesColumnVector inV; + inV = new BytesColumnVector(); + inV.initBuffer(); + inV.setVal(0, StandardCharsets.UTF_8.encode("19/12/31").array()); + inV.setVal(1, StandardCharsets.UTF_8.encode("1776--07--04").array()); + inV.setVal(2, StandardCharsets.UTF_8.encode("2012/02/29").array()); + inV.setVal(3, StandardCharsets.UTF_8.encode("1580/08/08").array()); + inV.setVal(4, StandardCharsets.UTF_8.encode("0005/01/01").array()); + inV.setVal(5, StandardCharsets.UTF_8.encode("9999/12/31").array()); + + batch.cols[0] = inV; + + batch.size = 6; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) { Random r = new Random(345); VectorizedRowBatch batch = new VectorizedRowBatch(2); @@ -297,6 +366,58 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] return batch; } + + public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut( + long[] epochSecondValues, int[] nanoValues) { + Random r = new Random(345); + VectorizedRowBatch batch = new VectorizedRowBatch(2); + batch.size = epochSecondValues.length; + + TimestampColumnVector inV; + BytesColumnVector outV; + inV = new TimestampColumnVector(batch.size); + outV = new BytesColumnVector(batch.size); + + for (int i = 0; i < batch.size; i++) { + Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r); + epochSecondValues[i] = randTimestamp.toEpochSecond(); + nanoValues[i] = randTimestamp.getNanos(); + inV.set(i, randTimestamp.toSqlTimestamp()); + } + + batch.cols[0] = inV; + batch.cols[1] = outV; + + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOutFormatted() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + TimestampColumnVector timestampColumnV; + BytesColumnVector stringColumnV; + timestampColumnV = new TimestampColumnVector(); + stringColumnV = new BytesColumnVector(); + + timestampColumnV.set(0, getSqlTimestamp("2019-12-31 19:20:21.999999999")); + timestampColumnV.set(1, getSqlTimestamp("1776-07-04 17:07:06.177617761")); + timestampColumnV.set(2, getSqlTimestamp("2012-02-29 23:59:59.999999999")); + timestampColumnV.set(3, getSqlTimestamp("1580-08-08 00:00:00")); + timestampColumnV.set(4, getSqlTimestamp("0005-01-01 00:00:00")); + timestampColumnV.set(5, getSqlTimestamp("9999-12-31 23:59:59.999999999")); + + batch.cols[0] = timestampColumnV; + batch.cols[1] = stringColumnV; + batch.size = 6; + return batch; + } + + private static java.sql.Timestamp getSqlTimestamp(String s) { + java.sql.Timestamp ts = java.sql.Timestamp.valueOf(s); + // subtract 8 hours because sql timestamps are assumed to be given in US/Pacific time + ts.setHours(ts.getHours() - 8); + return ts; + } + static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L; public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index 58fd7b030e..a449ea143d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -23,7 +23,9 @@ import static org.junit.Assert.assertTrue; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -72,6 +74,30 @@ public void testVectorCastDoubleToLong() throws HiveException { Assert.assertEquals(1, resultV.vector[6]); } + // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final + // value because VM in test time zone is PST. + private static final long TIME_DIFFERENCE = 28800000L; + @Test + public void testCastDateToString() throws HiveException { + int[] intValues = new int[100]; + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastDateToString(0, 1); + expr.evaluate(b); + + String expected, result; + for (int i = 0; i < intValues.length; i++) { + expected = + new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString(); + byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(subbyte, StandardCharsets.UTF_8); + + Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result); + } + } + @Test public void testCastDateToTimestamp() throws HiveException { int[] intValues = new int[500]; @@ -192,6 +218,31 @@ public void testCastTimestampToDouble() throws HiveException { } } + @Test + public void testCastTimestampToString() throws HiveException { + int numberToTest = 100; + long[] epochSecondValues = new long[numberToTest]; + int[] nanoValues = new int[numberToTest]; + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastTimestampToString(0, 1); + expr.evaluate(b); + + String expected, result; + for (int i = 0; i < numberToTest; i++) { + expected = org.apache.hadoop.hive.common.type.Timestamp + .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString(); + byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(subbyte, StandardCharsets.UTF_8); + Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] + + " nanoseconds: " + nanoValues[i], + expected, result); + } + } + public byte[] toBytes(String s) { byte[] b = null; try { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java new file mode 100644 index 0000000000..6aa2843bbe --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCastsWithFormat.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Assert; +import org.junit.Test; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * Tests vectorized type cast udfs CastDateToStringWithFormat, CastTimestampToStringWithFormat, + * CastStringToDateWithFormat, CastStringToTimestampWithFormat. + */ +public class TestVectorTypeCastsWithFormat { + + @Test + public void testCastDateToStringWithFormat() throws HiveException { + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOutFormatted(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + VectorExpression expr = new CastDateToStringWithFormat(0, "yyyy".getBytes(), 1); + expr.evaluate(b); + verifyString(0, "2019", resultV); + verifyString(1, "1776", resultV); + verifyString(2, "2012", resultV); + verifyString(3, "1580", resultV); + verifyString(4, "0005", resultV); + verifyString(5, "9999", resultV); + + expr = new CastDateToStringWithFormat(0, "MM".getBytes(), 1); + resultV = new BytesColumnVector(); + b.cols[1] = resultV; + expr.evaluate(b); + verifyString(0, "12", resultV); + verifyString(1, "07", resultV); + verifyString(2, "02", resultV); + verifyString(3, "08", resultV); + verifyString(4, "01", resultV); + verifyString(5, "12", resultV); + } + + @Test + public void testCastTimestampToStringWithFormat() throws HiveException { + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOutFormatted(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + VectorExpression expr = new CastTimestampToStringWithFormat(0, "yyyy".getBytes(), 1); + expr.evaluate(b); + + Assert.assertEquals("2019", getStringFromBytesColumnVector(resultV, 0)); + Assert.assertEquals("1776", getStringFromBytesColumnVector(resultV, 1)); + Assert.assertEquals("2012", getStringFromBytesColumnVector(resultV, 2)); + Assert.assertEquals("1580", getStringFromBytesColumnVector(resultV, 3)); + Assert.assertEquals("0004", getStringFromBytesColumnVector(resultV, 4)); + Assert.assertEquals("9999", getStringFromBytesColumnVector(resultV, 5)); + + resultV = new BytesColumnVector(); + b.cols[1] = resultV; + expr = new CastTimestampToStringWithFormat(0, "HH24".getBytes(), 1); + expr.evaluate(b); + + Assert.assertEquals("19", getStringFromBytesColumnVector(resultV, 0)); + Assert.assertEquals("17", getStringFromBytesColumnVector(resultV, 1)); + Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 2)); + Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 3)); + Assert.assertEquals("00", getStringFromBytesColumnVector(resultV, 4)); + Assert.assertEquals("23", getStringFromBytesColumnVector(resultV, 5)); + } + + @Test + public void testCastStringToTimestampWithFormat() throws HiveException { + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchStringInTimestampOutFormatted(); + TimestampColumnVector resultV; + resultV = new TimestampColumnVector(); + b.cols[1] = resultV; + VectorExpression expr = new CastStringToTimestampWithFormat(0, "yyyy.mm.dd HH24.mi.ss.ff".getBytes(), 1); + expr.evaluate(b); + + verifyTimestamp("2019-12-31 00:00:00.999999999", resultV, 0); + verifyTimestamp("1776-07-04 17:07:06.177617761", resultV, 1); + verifyTimestamp("2012-02-29 23:59:59.999999999", resultV, 2); + verifyTimestamp("1580-08-08 00:00:00", resultV, 3); + verifyTimestamp("0005-01-01 00:00:00", resultV, 4); + verifyTimestamp("9999-12-31 23:59:59.999999999", resultV, 5); + } + + private void verifyTimestamp(String tsString, TimestampColumnVector resultV, int index) { + Assert.assertEquals(Timestamp.valueOf(tsString).toEpochMilli(), resultV.time[index]); + Assert.assertEquals(Timestamp.valueOf(tsString).getNanos(), resultV.nanos[index]); + } + + @Test + public void testCastStringToDateWithFormat() throws HiveException { + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchStringInDateOutFormatted(); + LongColumnVector resultV; + resultV = new LongColumnVector(); + b.cols[1] = resultV; + VectorExpression expr = new CastStringToDateWithFormat(0, "yyyy.mm.dd".getBytes(), 1); + expr.evaluate(b); + + Assert.assertEquals(Date.valueOf("2019-12-31").toEpochDay(), resultV.vector[0]); + Assert.assertEquals(Date.valueOf("1776-07-04").toEpochDay(), resultV.vector[1]); + Assert.assertEquals(Date.valueOf("2012-02-29").toEpochDay(), resultV.vector[2]); + Assert.assertEquals(Date.valueOf("1580-08-08").toEpochDay(), resultV.vector[3]); + Assert.assertEquals(Date.valueOf("0005-01-01").toEpochDay(), resultV.vector[4]); + Assert.assertEquals(Date.valueOf("9999-12-31").toEpochDay(), resultV.vector[5]); + } + + private void verifyString(int resultIndex, String expected, BytesColumnVector resultV) { + String result = getStringFromBytesColumnVector(resultV, resultIndex); + Assert.assertEquals(expected, result); + } + + private String getStringFromBytesColumnVector(BytesColumnVector resultV, int i) { + String result; + byte[] resultBytes = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(resultBytes, StandardCharsets.UTF_8); + return result; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java new file mode 100644 index 0000000000..97c7650f31 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests UDFFromUnixTime. + */ +public class TestUDFFromUnixTime { + + @Test + public void testFromUnixTime() { + UDFFromUnixTime udf = new UDFFromUnixTime(); + + //int, no format + verifyInt(0, "1970-01-01 00:00:00", null, udf); + verifyInt(1296705906, "2011-02-03 04:05:06", null, udf); + verifyInt(1514818800, "2018-01-01 15:00:00", null, udf); + + //long, no format + verifyLong(0L, "1970-01-01 00:00:00", null, udf); + verifyLong(1296705906L, "2011-02-03 04:05:06", null, udf); + verifyLong(1514818800L, "2018-01-01 15:00:00", null, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "0995-01-05 15:00:00", null, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "0005-01-01 00:00:00", null, udf); + verifyLong(253402300799L, "9999-12-31 23:59:59", null, udf); + + //int with format + String format = "HH:mm:ss"; + verifyInt(0, "00:00:00", format, udf); + verifyInt(1296705906, "04:05:06", format, udf); + verifyInt(1514818800, "15:00:00", format, udf); + + //long with format + verifyLong(0L, "00:00:00", format, udf); + verifyLong(1296705906L, "04:05:06", format, udf); + verifyLong(1514818800L, "15:00:00", format, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "15:00:00", format, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "00:00:00", format, udf); + verifyLong(253402300799L, "23:59:59", format, udf); + + } + + private void verifyInt(int value, String expected, String format, UDFFromUnixTime udf) { + IntWritable input = new IntWritable(value); + Text res; + if (format == null) { + res = udf.evaluate(input); + } else { + res = udf.evaluate(input, new Text(format)); + } + Assert.assertEquals(expected, res.toString()); + } + + private void verifyLong(long value, String expected, String format, UDFFromUnixTime udf) { + LongWritable input = new LongWritable(value); + Text res; + if (format == null) { + res = udf.evaluate(input); + } else { + res = udf.evaluate(input, new Text(format)); + } + Assert.assertEquals(expected, res.toString()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index 7c2ee15646..e9c188b883 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -35,9 +35,9 @@ public class TestGenericUDFAddMonths extends TestCase { - private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss"); - private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS"); - private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd"); + private final Text fmtTextWithTime = new Text("yyyy-MM-dd HH:mm:ss"); + private final Text fmtTextWithTimeAndms = new Text("yyyy-MM-dd HH:mm:ss.SSS"); + private final Text fmtTextWithoutTime = new Text("yyyy-MM-dd"); private final Text fmtTextInvalid = new Text("YYYY-abcdz"); public void testAddMonthsInt() throws HiveException { @@ -215,7 +215,6 @@ public void testAddMonthsLong() throws HiveException { } - private void runAndVerify(String str, int months, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java new file mode 100644 index 0000000000..e6d531f2b5 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCastWithFormat.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertNull; + +/** + * Tests cast udfs GenericUDFToString, GenericUDFToDate, GenericUDFTimestamp with second format + * argument. E.g. CAST ( AS STRING WITH FORMAT ) + */ +public class TestGenericUDFCastWithFormat { + + @Test + public void testDateToStringWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToString(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + testCast(udf, inputOI, date("2009-07-30"),"yyyy-MM-dd", "2009-07-30"); + testCast(udf, inputOI, date("2009-07-30"), "yyyy", "2009"); + testCast(udf, inputOI, date("1969-07-30"), "dd", "30"); + } + + @Test + public void testStringToDateWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToDate(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + testCast(udf, inputOI, "2009-07-30", "yyyy-MM-dd", "2009-07-30"); + testCast(udf, inputOI, "2009", "yyyy", "2009-01-01"); + testCast(udf, inputOI, "30", "dd", "1970-01-30"); + } + + @Test + public void testStringToTimestampWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFTimestamp(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + testCast(udf, inputOI, "2009-07-30 01:02:03", "yyyy-MM-dd HH24:mi:ss", "2009-07-30 01:02:03"); + testCast(udf, inputOI, "2009", "yyyy", "2009-01-01 00:00:00"); + testCast(udf, inputOI, "07/30/2009 11:0200", "MM/dd/yyyy hh24:miss", "2009-07-30 11:02:00"); + testCast(udf, inputOI, "69.07.30.", "yy.MM.dd.", "1969-07-30 00:00:00"); + } + + @Test + public void testTimestampToStringWithFormat() throws HiveException { + GenericUDF udf = new GenericUDFToString(); + ObjectInspector inputOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + testCast(udf, inputOI, timestamp("2009-07-30 00:00:08"), + "yyyy-MM-dd HH24:mi:ss", "2009-07-30 00:00:08"); + testCast(udf, inputOI, timestamp("2009-07-30 11:02:00"), + "MM/dd/yyyy hh24miss", "07/30/2009 110200"); + testCast(udf, inputOI, timestamp("2009-07-30 01:02:03"), "MM", "07"); + testCast(udf, inputOI, timestamp("1969-07-30 00:00:00"), "yy", "69"); + } + + private TimestampWritableV2 timestamp(String s) { + return new TimestampWritableV2(Timestamp.valueOf(s)); + } + + private DateWritableV2 date(String s) { + return new DateWritableV2(Date.valueOf(s)); + } + + private void testCast( + GenericUDF udf, ObjectInspector inputOI, Object input, String format, String output) + throws HiveException { + + ConstantObjectInspector formatOI = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.getPrimitiveTypeInfo("string"), new Text(format)); + ObjectInspector[] arguments = {inputOI, formatOI}; + udf.initialize(arguments); + + GenericUDF.DeferredObject valueObj = new GenericUDF.DeferredJavaObject(input); + GenericUDF.DeferredObject formatObj = new GenericUDF.DeferredJavaObject(new Text(format)); + GenericUDF.DeferredObject[] args = {valueObj, formatObj}; + + assertEquals("cast " + inputOI.getTypeName() + " to " + udf.getFuncName() + " failed ", + output, udf.evaluate(args).toString()); + + // Try with null args + GenericUDF.DeferredObject[] nullArgs = {new GenericUDF.DeferredJavaObject(null)}; + assertNull(udf.getFuncName() + " with NULL arguments failed", udf.evaluate(nullArgs)); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java index 6a3cdda48a..8c7df4d966 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java @@ -44,24 +44,32 @@ public void testDateFormatStr() throws HiveException { udf.initialize(arguments); // date str - runAndVerifyStr("2015-04-05", fmtText, "Sunday", udf); - runAndVerifyStr("2015-04-06", fmtText, "Monday", udf); - runAndVerifyStr("2015-04-07", fmtText, "Tuesday", udf); - runAndVerifyStr("2015-04-08", fmtText, "Wednesday", udf); - runAndVerifyStr("2015-04-09", fmtText, "Thursday", udf); - runAndVerifyStr("2015-04-10", fmtText, "Friday", udf); - runAndVerifyStr("2015-04-11", fmtText, "Saturday", udf); - runAndVerifyStr("2015-04-12", fmtText, "Sunday", udf); + runAndVerifyStr("2015-04-05", "Sunday", udf); + runAndVerifyStr("2015-04-06", "Monday", udf); + runAndVerifyStr("2015-04-07", "Tuesday", udf); + runAndVerifyStr("2015-04-08", "Wednesday", udf); + runAndVerifyStr("2015-04-09", "Thursday", udf); + runAndVerifyStr("2015-04-10", "Friday", udf); + runAndVerifyStr("2015-04-11", "Saturday", udf); + runAndVerifyStr("2015-04-12", "Sunday", udf); // ts str - runAndVerifyStr("2015-04-05 10:30:45", fmtText, "Sunday", udf); - runAndVerifyStr("2015-04-06 10:30:45", fmtText, "Monday", udf); - runAndVerifyStr("2015-04-07 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyStr("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyStr("2015-04-09 10:30", fmtText, "Thursday", udf); - runAndVerifyStr("2015-04-10 10:30:45.123", fmtText, "Friday", udf); - runAndVerifyStr("2015-04-11T10:30:45", fmtText, "Saturday", udf); - runAndVerifyStr("2015-04-12 10", fmtText, "Sunday", udf); + runAndVerifyStr("2015-04-05 10:30:45", "Sunday", udf); + runAndVerifyStr("2015-04-06 10:30:45", "Monday", udf); + runAndVerifyStr("2015-04-07 10:30:45", "Tuesday", udf); + runAndVerifyStr("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyStr("2015-04-09 10:30", "Thursday", udf); + runAndVerifyStr("2015-04-10 10:30:45.123", "Friday", udf); + runAndVerifyStr("2015-04-11T10:30:45", "Saturday", udf); + runAndVerifyStr("2015-04-12 10", "Sunday", udf); + + //make sure hour is ok + fmtText = new Text("hh"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyStr("2015-04-10 10:30:45.123", "10", udf); } public void testWrongDateStr() throws HiveException { @@ -73,10 +81,10 @@ public void testWrongDateStr() throws HiveException { ObjectInspector[] arguments = {valueOI0, valueOI1}; udf.initialize(arguments); - runAndVerifyStr("2016-02-30 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyStr("2014-01-32", fmtText, "Saturday", udf); - runAndVerifyStr("01/14/2014", fmtText, null, udf); - runAndVerifyStr(null, fmtText, null, udf); + runAndVerifyStr("2016-02-30 10:30:45", "Tuesday", udf); + runAndVerifyStr("2014-01-32", "Saturday", udf); + runAndVerifyStr("01/14/2014", null, udf); + runAndVerifyStr(null, null, udf); } public void testDateFormatDate() throws HiveException { @@ -89,14 +97,22 @@ public void testDateFormatDate() throws HiveException { udf.initialize(arguments); - runAndVerifyDate("2015-04-05", fmtText, "Sunday", udf); - runAndVerifyDate("2015-04-06", fmtText, "Monday", udf); - runAndVerifyDate("2015-04-07", fmtText, "Tuesday", udf); - runAndVerifyDate("2015-04-08", fmtText, "Wednesday", udf); - runAndVerifyDate("2015-04-09", fmtText, "Thursday", udf); - runAndVerifyDate("2015-04-10", fmtText, "Friday", udf); - runAndVerifyDate("2015-04-11", fmtText, "Saturday", udf); - runAndVerifyDate("2015-04-12", fmtText, "Sunday", udf); + runAndVerifyDate("2015-04-05", "Sunday", udf); + runAndVerifyDate("2015-04-06", "Monday", udf); + runAndVerifyDate("2015-04-07", "Tuesday", udf); + runAndVerifyDate("2015-04-08", "Wednesday", udf); + runAndVerifyDate("2015-04-09", "Thursday", udf); + runAndVerifyDate("2015-04-10", "Friday", udf); + runAndVerifyDate("2015-04-11", "Saturday", udf); + runAndVerifyDate("2015-04-12", "Sunday", udf); + + // make sure year is ok + fmtText = new Text("yyyy"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyDate("2015-04-08", "2015", udf); } public void testDateFormatTs() throws HiveException { @@ -109,15 +125,24 @@ public void testDateFormatTs() throws HiveException { udf.initialize(arguments); - runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyTs("2015-04-05 10:30:45", fmtText, "Sunday", udf); - runAndVerifyTs("2015-04-06 10:30:45", fmtText, "Monday", udf); - runAndVerifyTs("2015-04-07 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyTs("2015-04-09 10:30:45", fmtText, "Thursday", udf); - runAndVerifyTs("2015-04-10 10:30:45.123", fmtText, "Friday", udf); - runAndVerifyTs("2015-04-11 10:30:45.123456789", fmtText, "Saturday", udf); - runAndVerifyTs("2015-04-12 10:30:45", fmtText, "Sunday", udf); + runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyTs("2015-04-05 10:30:45", "Sunday", udf); + runAndVerifyTs("2015-04-06 10:30:45", "Monday", udf); + runAndVerifyTs("2015-04-07 10:30:45", "Tuesday", udf); + runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyTs("2015-04-09 10:30:45", "Thursday", udf); + runAndVerifyTs("2015-04-10 10:30:45.123", "Friday", udf); + runAndVerifyTs("2015-04-11 10:30:45.123456789", "Saturday", udf); + runAndVerifyTs("2015-04-12 10:30:45", "Sunday", udf); + + // make sure hour of day is ok + fmtText = new Text("HH"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyTs("2015-04-08 00:30:45", "00", udf); + } public void testNullFmt() throws HiveException { @@ -126,11 +151,11 @@ public void testNullFmt() throws HiveException { Text fmtText = null; ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); - ObjectInspector[] arguments = { valueOI0, valueOI1 }; + ObjectInspector[] arguments = {valueOI0, valueOI1}; udf.initialize(arguments); - runAndVerifyStr("2015-04-05", fmtText, null, udf); + runAndVerifyStr("2015-04-05", null, udf); } public void testWrongFmt() throws HiveException { @@ -139,38 +164,35 @@ public void testWrongFmt() throws HiveException { Text fmtText = new Text("Q"); ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); - ObjectInspector[] arguments = { valueOI0, valueOI1 }; + ObjectInspector[] arguments = {valueOI0, valueOI1}; udf.initialize(arguments); - runAndVerifyStr("2015-04-05", fmtText, null, udf); + runAndVerifyStr("2015-04-05", null, udf); } - private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyStr(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = {valueObj0}; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } - private void runAndVerifyDate(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyDate(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new DateWritableV2( Date.valueOf(str)) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = {valueObj0}; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } - private void runAndVerifyTs(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyTs(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritableV2( Timestamp.valueOf(str)) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = {valueObj0}; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } diff --git ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q new file mode 100644 index 0000000000..daeb61c975 --- /dev/null +++ ql/src/test/queries/clientpositive/cast_datetime_with_sql_2016_format.q @@ -0,0 +1,63 @@ +--non-vectorized +set hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=more; + +create table timestamp1 (t timestamp) stored as parquet; +insert into timestamp1 values +("2020-02-03"), +("1969-12-31 23:59:59.999999999") +; +from timestamp1 select cast (t as string format "yyyy hh24...PM ff"); +from timestamp1 select cast (t as char(11) format "yyyy hh24...PM ff"); -- will be truncated +from timestamp1 select cast (t as varchar(11) format "yyyy hh24...PM ff"); -- will be truncated + +create table dates (d date) stored as parquet; +insert into dates values +("2020-02-03"), +("1969-12-31") +; +from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9"); +from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9"); -- will be truncated +from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9"); -- will be truncated + +create table strings (s string) stored as parquet; +create table varchars (s varchar(11)) stored as parquet; +create table chars (s char(11)) stored as parquet; +insert into strings values +("20 / 2 / 3"), +("1969 12 31") +; +insert into varchars select * from strings; +insert into chars select * from strings; + +from strings select cast (s as timestamp format "yyyy.mm.dd"); +from strings select cast (s as date format "yyyy.mm.dd"); +from varchars select cast (s as timestamp format "yyyy.mm.dd"); +from varchars select cast (s as date format "yyyy.mm.dd"); +from chars select cast (s as timestamp format "yyyy.mm.dd"); +from chars select cast (s as date format "yyyy.mm.dd"); + + +--correct descriptions +explain from strings select cast (s as timestamp format "yyy.mm.dd"); +explain from strings select cast (s as date format "yyy.mm.dd"); +explain from timestamp1 select cast (t as string format "yyyy"); +explain from timestamp1 select cast (t as varchar(12) format "yyyy"); + + +--vectorized +set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +from timestamp1 select cast (t as string format "yyyy"); +from dates select cast (d as string format "yyyy"); +from timestamp1 select cast (t as varchar(11) format "yyyy"); +from dates select cast (d as varchar(11) format "yyyy"); +from timestamp1 select cast (t as char(11) format "yyyy"); +from dates select cast (d as char(11) format "yyyy"); +from strings select cast (s as timestamp format "yyyy.mm.dd"); +from varchars select cast (s as timestamp format "yyyy.mm.dd"); +from chars select cast (s as timestamp format "yyyy.mm.dd"); +from strings select cast (s as date format "yyyy.mm.dd"); +from varchars select cast (s as date format "yyyy.mm.dd"); +from chars select cast (s as date format "yyyy.mm.dd"); diff --git ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out new file mode 100644 index 0000000000..e5f0d4bf35 --- /dev/null +++ ql/src/test/results/clientpositive/cast_datetime_with_sql_2016_format.q.out @@ -0,0 +1,469 @@ +PREHOOK: query: drop table if exists timestamps +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists timestamps +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists dates +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists dates +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists strings +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists strings +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists chars +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists chars +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists varchars +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists varchars +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table timestamps (t timestamp) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps +POSTHOOK: query: create table timestamps (t timestamp) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps +PREHOOK: query: insert into timestamps values +("2020-02-03"), +("1969-12-31 23:59:59.999999999") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@timestamps +POSTHOOK: query: insert into timestamps values +("2020-02-03"), +("1969-12-31 23:59:59.999999999") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.t SCRIPT [] +PREHOOK: query: from timestamps select cast (t as string format "yyyy hh24...PM ff") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as string format "yyyy hh24...PM ff") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2020 00...AM 0 +1969 23...PM 999999999 +PREHOOK: query: from timestamps select cast (t as char(11) format "yyyy hh24...PM ff") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as char(11) format "yyyy hh24...PM ff") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2020 00...A +1969 23...P +PREHOOK: query: -- will be truncated +from timestamps select cast (t as varchar(11) format "yyyy hh24...PM ff") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: -- will be truncated +from timestamps select cast (t as varchar(11) format "yyyy hh24...PM ff") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2020 00...A +1969 23...P +PREHOOK: query: -- will be truncated + +create table dates (d date) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates +POSTHOOK: query: -- will be truncated + +create table dates (d date) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates +PREHOOK: query: insert into dates values +("2020-02-03"), +("1969-12-31") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates +POSTHOOK: query: insert into dates values +("2020-02-03"), +("1969-12-31") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates +POSTHOOK: Lineage: dates.d SCRIPT [] +PREHOOK: query: from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9") +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: from dates select cast (d as string format "yyyy mm dd , hh24 mi ss ff9") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +2020 02 03 , 00 00 00 000000000 +1969 12 31 , 00 00 00 000000000 +PREHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9") +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: from dates select cast (d as char(10) format "yyyy mm dd , hh24 mi ss ff9") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +2020 02 03 +1969 12 31 +PREHOOK: query: -- will be truncated +from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9") +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: -- will be truncated +from dates select cast (d as varchar(10) format "yyyy mm dd , hh24 mi ss ff9") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +2020 02 03 +1969 12 31 +PREHOOK: query: -- will be truncated + +create table strings (s string) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@strings +POSTHOOK: query: -- will be truncated + +create table strings (s string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@strings +PREHOOK: query: create table varchars (s varchar(11)) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchars +POSTHOOK: query: create table varchars (s varchar(11)) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchars +PREHOOK: query: create table chars (s char(11)) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@chars +POSTHOOK: query: create table chars (s char(11)) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@chars +PREHOOK: query: insert into strings values +("20 / 2 / 3"), +("1969 12 31") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@strings +POSTHOOK: query: insert into strings values +("20 / 2 / 3"), +("1969 12 31") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@strings +POSTHOOK: Lineage: strings.s SCRIPT [] +PREHOOK: query: insert into varchars select * from strings +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +PREHOOK: Output: default@varchars +POSTHOOK: query: insert into varchars select * from strings +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +POSTHOOK: Output: default@varchars +POSTHOOK: Lineage: varchars.s EXPRESSION [(strings)strings.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: insert into chars select * from strings +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +PREHOOK: Output: default@chars +POSTHOOK: query: insert into chars select * from strings +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +POSTHOOK: Output: default@chars +POSTHOOK: Lineage: chars.s EXPRESSION [(strings)strings.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2020-02-03 00:00:00 +1969-12-31 00:00:00 +PREHOOK: query: from strings select cast (s as date format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as date format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2020-02-03 +1969-12-31 +PREHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@varchars +#### A masked pattern was here #### +POSTHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchars +#### A masked pattern was here #### +2020-02-03 00:00:00 +1969-12-31 00:00:00 +PREHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@varchars +#### A masked pattern was here #### +POSTHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchars +#### A masked pattern was here #### +2020-02-03 +1969-12-31 +PREHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@chars +#### A masked pattern was here #### +POSTHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@chars +#### A masked pattern was here #### +2020-02-03 00:00:00 +1969-12-31 00:00:00 +PREHOOK: query: from chars select cast (s as date format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@chars +#### A masked pattern was here #### +POSTHOOK: query: from chars select cast (s as date format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@chars +#### A masked pattern was here #### +2020-02-03 +1969-12-31 +PREHOOK: query: explain from strings select cast (s as timestamp format "yyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: explain from strings select cast (s as timestamp format "yyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: strings + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( s AS TIMESTAMP FORMAT 'yyy.mm.dd') (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: explain from strings select cast (s as date format "yyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: strings + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( s AS DATE FORMAT 'yyy.mm.dd') (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain from timestamps select cast (t as string format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: explain from timestamps select cast (t as string format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: timestamps + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( t AS STRING FORMAT 'yyyy') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain from timestamps select cast (t as varchar(12) format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: explain from timestamps select cast (t as varchar(12) format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: timestamps + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( t AS AS VARCHAR(12) FORMAT 'yyyy') (type: varchar(12)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: from timestamps select cast (t as string format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as string format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2020 +1969 +PREHOOK: query: from dates select cast (d as string format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: from dates select cast (d as string format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +2020 +1969 +PREHOOK: query: from timestamps select cast (t as varchar(11) format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as varchar(11) format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2020 +1969 +PREHOOK: query: from dates select cast (d as varchar(11) format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: from dates select cast (d as varchar(11) format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +2020 +1969 +PREHOOK: query: from timestamps select cast (t as char(11) format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: from timestamps select cast (t as char(11) format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2020 +1969 +PREHOOK: query: from dates select cast (d as char(11) format "yyyy") +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: from dates select cast (d as char(11) format "yyyy") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +2020 +1969 +PREHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as timestamp format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2020-02-03 00:00:00 +1969-12-31 00:00:00 +PREHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@varchars +#### A masked pattern was here #### +POSTHOOK: query: from varchars select cast (s as timestamp format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchars +#### A masked pattern was here #### +2020-02-03 00:00:00 +1969-12-31 00:00:00 +PREHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@chars +#### A masked pattern was here #### +POSTHOOK: query: from chars select cast (s as timestamp format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@chars +#### A masked pattern was here #### +2020-02-03 00:00:00 +1969-12-31 00:00:00 +PREHOOK: query: from strings select cast (s as date format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@strings +#### A masked pattern was here #### +POSTHOOK: query: from strings select cast (s as date format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strings +#### A masked pattern was here #### +2020-02-03 +1969-12-31 +PREHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@varchars +#### A masked pattern was here #### +POSTHOOK: query: from varchars select cast (s as date format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchars +#### A masked pattern was here #### +2020-02-03 +1969-12-31 +PREHOOK: query: from chars select cast (s as date format "yyyy.mm.dd") +PREHOOK: type: QUERY +PREHOOK: Input: default@chars +#### A masked pattern was here #### +POSTHOOK: query: from chars select cast (s as date format "yyyy.mm.dd") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@chars +#### A masked pattern was here #### +2020-02-03 +1969-12-31 diff --git ql/src/test/results/clientpositive/udf_string.q.out ql/src/test/results/clientpositive/udf_string.q.out index aa764a9db6..6da63be8f8 100644 --- ql/src/test/results/clientpositive/udf_string.q.out +++ ql/src/test/results/clientpositive/udf_string.q.out @@ -2,12 +2,13 @@ PREHOOK: query: DESCRIBE FUNCTION string PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION string POSTHOOK: type: DESCFUNCTION -CAST( as STRING) - Converts the argument to a string value. +CAST( as STRING [FORMAT ]) - Converts the argument to a string value. PREHOOK: query: DESCRIBE FUNCTION EXTENDED string PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED string POSTHOOK: type: DESCFUNCTION -CAST( as STRING) - Converts the argument to a string value. +CAST( as STRING [FORMAT ]) - Converts the argument to a string value. +If format is specified with FORMAT argument then SQL:2016 datetime formats will be used. Example: > SELECT CAST(1234 AS string) FROM src LIMIT 1; '1234' diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java index 4b6a3d6c10..4ff4732324 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java @@ -21,6 +21,7 @@ import java.io.DataOutput; import java.io.IOException; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableUtils; @@ -147,6 +148,10 @@ public String toString() { return date.toString(); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + return date.toStringFormatted(formatter); + } + @Override public int hashCode() { return date.toEpochDay(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java index 9aa7f19ab2..5972bd92b5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.time.format.DateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampUtils; @@ -387,6 +388,16 @@ public String toString() { return timestamp.format(DATE_TIME_FORMAT); } + public String toStringFormatted(HiveDateTimeFormatter formatter) { + if (formatter == null) { + return toString(); + } + if (timestampEmpty) { + populateTimestamp(); + } + return timestamp.toStringFormatted(formatter); + } + @Override public int hashCode() { long seconds = getSeconds(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index 84c027d51c..3da8a18c4e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -20,6 +20,7 @@ import java.time.ZoneId; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -250,6 +251,7 @@ public Object convert(Object input) { PrimitiveObjectInspector inputOI; SettableDateObjectInspector outputOI; Object r; + private HiveDateTimeFormatter formatter = null; public DateConverter(PrimitiveObjectInspector inputOI, SettableDateObjectInspector outputOI) { @@ -263,7 +265,11 @@ public Object convert(Object input) { return null; } return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input, - inputOI)); + inputOI, formatter)); + } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; } } @@ -272,6 +278,7 @@ public Object convert(Object input) { SettableTimestampObjectInspector outputOI; boolean intToTimestampInSeconds = false; Object r; + private HiveDateTimeFormatter formatter = null; public TimestampConverter(PrimitiveObjectInspector inputOI, SettableTimestampObjectInspector outputOI) { @@ -289,7 +296,11 @@ public Object convert(Object input) { return null; } return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestamp(input, - inputOI, intToTimestampInSeconds)); + inputOI, intToTimestampInSeconds, formatter)); + } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; } } @@ -416,6 +427,7 @@ public Object convert(Object input) { private static byte[] trueBytes = {'T', 'R', 'U', 'E'}; private static byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'}; + private HiveDateTimeFormatter formatter = null; public TextConverter(PrimitiveObjectInspector inputOI) { // The output ObjectInspector is writableStringObjectInspector. @@ -486,11 +498,12 @@ public Text convert(Object input) { } return t; case DATE: - t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); + t.set(((DateObjectInspector) inputOI) + .getPrimitiveWritableObject(input).toStringFormatted(formatter)); return t; case TIMESTAMP: t.set(((TimestampObjectInspector) inputOI) - .getPrimitiveWritableObject(input).toString()); + .getPrimitiveWritableObject(input).toStringFormatted(formatter)); return t; case TIMESTAMPLOCALTZ: t.set(((TimestampLocalTZObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); @@ -520,6 +533,10 @@ public Text convert(Object input) { throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName()); } } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; + } } /** @@ -545,6 +562,7 @@ public Object convert(Object input) { PrimitiveObjectInspector inputOI; SettableHiveVarcharObjectInspector outputOI; Object hc; + private HiveDateTimeFormatter formatter; public HiveVarcharConverter(PrimitiveObjectInspector inputOI, SettableHiveVarcharObjectInspector outputOI) { @@ -567,21 +585,26 @@ public Object convert(Object input) { return null; } switch (inputOI.getPrimitiveCategory()) { - case BOOLEAN: - return outputOI.set(hc, - ((BooleanObjectInspector) inputOI).get(input) ? - new HiveVarchar("TRUE", -1) : new HiveVarchar("FALSE", -1)); - default: - return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI)); + case BOOLEAN: + return outputOI.set(hc, + ((BooleanObjectInspector) inputOI).get(input) ? new HiveVarchar("TRUE", + -1) : new HiveVarchar("FALSE", -1)); + default: + return outputOI + .set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI, formatter)); } } + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; + } } public static class HiveCharConverter implements Converter { PrimitiveObjectInspector inputOI; SettableHiveCharObjectInspector outputOI; Object hc; + private HiveDateTimeFormatter formatter; public HiveCharConverter(PrimitiveObjectInspector inputOI, SettableHiveCharObjectInspector outputOI) { @@ -601,8 +624,13 @@ public Object convert(Object input) { ((BooleanObjectInspector) inputOI).get(input) ? new HiveChar("TRUE", -1) : new HiveChar("FALSE", -1)); default: - return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI)); + return outputOI.set(hc, + PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI, formatter)); } } + + public void setDateTimeFormatter(HiveDateTimeFormatter formatter) { + this.formatter = formatter; + } } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 3886b202c7..6cf231e7ae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -891,12 +892,18 @@ public static float getFloat(Object o, PrimitiveObjectInspector oi) { return (float) getDouble(o, oi); } + public static String getString(Object o, PrimitiveObjectInspector oi) { + return getString(o, oi, null); + } + /** * Get the String value out of a primitive object. Note that * NullPointerException will be thrown if o is null. Note that * RuntimeException will be thrown if o is not a valid string. + * HiveDateTimeFormatter is optional. */ - public static String getString(Object o, PrimitiveObjectInspector oi) { + public static String getString(Object o, PrimitiveObjectInspector oi, + HiveDateTimeFormatter formatter) { if (o == null) { return null; @@ -951,10 +958,12 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { result = hcoi.getPrimitiveJavaObject(o).toString(); break; case DATE: - result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString(); + result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o) + .toStringFormatted(formatter); break; case TIMESTAMP: - result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).toString(); + result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) + .toStringFormatted(formatter); break; case TIMESTAMPLOCALTZ: result = ((TimestampLocalTZObjectInspector) oi).getPrimitiveWritableObject(o).toString(); @@ -978,25 +987,35 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { } public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi) { + return getHiveChar(o, oi, null); + } + + public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi, + HiveDateTimeFormatter formatter) { if (o == null) { return null; } HiveChar result = null; switch (oi.getPrimitiveCategory()) { - case CHAR: - result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o); - break; - default: - // No char length available, copy whole string value here. - result = new HiveChar(); - result.setValue(getString(o, oi)); - break; + case CHAR: + result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o); + break; + default: + // No char length available, copy whole string value here. + result = new HiveChar(); + result.setValue(getString(o, oi, formatter)); + break; } return result; } public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) { + return getHiveVarchar(o, oi, null); + } + + public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi, + HiveDateTimeFormatter formatter) { if (o == null) { return null; @@ -1004,16 +1023,16 @@ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) HiveVarchar result = null; switch (oi.getPrimitiveCategory()) { - case VARCHAR: - result = ((HiveVarcharObjectInspector)oi).getPrimitiveJavaObject(o); - break; - default: - // Is there a way to provide char length here? - // It might actually be ok as long as there is an object inspector (with char length) - // receiving this value. - result = new HiveVarchar(); - result.setValue(getString(o, oi)); - break; + case VARCHAR: + result = ((HiveVarcharObjectInspector) oi).getPrimitiveJavaObject(o); + break; + default: + // Is there a way to provide char length here? + // It might actually be ok as long as there is an object inspector (with char length) + // receiving this value. + result = new HiveVarchar(); + result.setValue(getString(o, oi, formatter)); + break; } return result; } @@ -1113,6 +1132,11 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi) } public static Date getDate(Object o, PrimitiveObjectInspector oi) { + return getDate(o, oi, null); + } + + public static Date getDate( + Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) { if (o == null) { return null; } @@ -1125,13 +1149,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { StringObjectInspector soi = (StringObjectInspector) oi; String s = soi.getPrimitiveJavaObject(o).trim(); try { - if (s.length() == DATE_LENGTH) { - result = Date.valueOf(s); - } else { - Timestamp ts = getTimestampFromString(s); - if (ts != null) { - result = Date.ofEpochMilli(ts.toEpochMilli()); - } + Date date = getDateFromString(s, formatter); + if (date != null) { + result = date; } } catch (IllegalArgumentException e) { // Do nothing @@ -1141,13 +1161,9 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { case VARCHAR: { String val = getString(o, oi).trim(); try { - if (val.length() == DATE_LENGTH) { - result = Date.valueOf(val); - } else { - Timestamp ts = getTimestampFromString(val); - if (ts != null) { - result = Date.ofEpochMilli(ts.toEpochMilli()); - } + Date date = getDateFromString(val, formatter); + if (date != null) { + result = date; } } catch (IllegalArgumentException e) { // Do nothing @@ -1177,11 +1193,46 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { return result; } + private final static int DATE_LENGTH = "YYYY-MM-DD".length(); + private static Date getDateFromString(String s, HiveDateTimeFormatter formatter) { + + // with SQL formats + if (formatter != null) { + try { + return Date.valueOf(s, formatter); + } catch (IllegalArgumentException e) { + return null; + } + } + + // without SQL formats + if (s.length() == DATE_LENGTH) { + return Date.valueOf(s); + } else { + Timestamp ts = getTimestampFromString(s); + if (ts != null) { + return Date.ofEpochMilli(ts.toEpochMilli()); + } + } + return null; + } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { return getTimestamp(o, oi, false); } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi, HiveDateTimeFormatter formatter) { + return getTimestamp(o, oi, false, formatter); + } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, boolean intToTimestampInSeconds) { + return getTimestamp(o, inputOI, intToTimestampInSeconds, null); + } + + public static Timestamp getTimestamp(Object o, + PrimitiveObjectInspector inputOI, + boolean intToTimestampInSeconds, + HiveDateTimeFormatter format) { if (o == null) { return null; } @@ -1225,11 +1276,11 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, case STRING: StringObjectInspector soi = (StringObjectInspector) inputOI; String s = soi.getPrimitiveJavaObject(o); - result = getTimestampFromString(s); + result = getTimestampFromString(s, format); break; case CHAR: case VARCHAR: - result = getTimestampFromString(getString(o, inputOI)); + result = getTimestampFromString(getString(o, inputOI), format); break; case DATE: result = Timestamp.ofEpochMilli( @@ -1254,15 +1305,17 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, return result; } - private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length(); - private final static int DATE_LENGTH = "YYYY-MM-DD".length(); - public static Timestamp getTimestampFromString(String s) { + return getTimestampFromString(s, null); + } + + public static Timestamp getTimestampFromString(String s, HiveDateTimeFormatter formatter) { + s = s.trim(); s = trimNanoTimestamp(s); try { - return TimestampUtils.stringToTimestamp(s); + return TimestampUtils.stringToTimestamp(s, formatter); } catch (IllegalArgumentException e) { return null; } @@ -1284,19 +1337,6 @@ private static String trimNanoTimestamp(String s) { return s; } - private static boolean isValidTimeStamp(final String s) { - if (s.length() == TS_LENGTH || - (s.contains(".") && - s.substring(0, s.indexOf('.')).length() == TS_LENGTH)) { - // Possible timestamp - if (s.charAt(DATE_LENGTH) == '-') { - return false; - } - return true; - } - return false; - } - public static TimestampTZ getTimestampLocalTZ(Object o, PrimitiveObjectInspector oi, ZoneId timeZone) { if (o == null) {