diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java index 102647e812..9443e8ec78 100644 --- common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java @@ -21,22 +21,32 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.WordUtils; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import java.io.Serializable; import java.time.DateTimeException; +import java.time.DayOfWeek; import java.time.Instant; import java.time.LocalDateTime; +import java.time.Month; import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.TextStyle; import java.time.temporal.ChronoField; import java.time.temporal.ChronoUnit; +import java.time.temporal.IsoFields; import java.time.temporal.TemporalField; import java.time.temporal.TemporalUnit; +import java.time.temporal.WeekFields; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Formatter using SQL:2016 datetime patterns. @@ -107,7 +117,7 @@ * * MM * Month (1-12) - * - For string to datetime conversion, conflicts with DDD. + * - For string to datetime conversion, conflicts with DDD, MONTH, MON. * * DD * Day of month (1-31) @@ -167,6 +177,82 @@ * output=2019-01-01 23:00:00 * - If FX is enabled, input length has to match the pattern's length. e.g. pattern=AM input=A.M. * is not accepted, but input=pm is. + * - Not listed as a character temporal because of special status: does not get padded with spaces + * upon formatting, and case is handled differently at datetime to string conversion. + * + * D + * Day of week (1-7) + * - 1 means Sunday, 2 means Monday, and so on. + * - Not allowed in string to datetime conversion. + * + * Q + * Quarter of year (1-4) + * - Not allowed in string to datetime conversion. + * + * WW + * Aligned week of year (1-53) + * - 1st week begins on January 1st and ends on January 7th, and so on. + * - Not allowed in string to datetime conversion. + * + * W + * Aligned week of month (1-5) + * - 1st week starts on the 1st of the month and ends on the 7th, and so on. + * - Not allowed in string to datetime conversion. + * + * A.2. Character temporals + * Temporal elements, but spelled out. + * - For datetime to string conversion, the pattern's case must match one of the listed formats + * (e.g. mOnTh is not accepted) to avoid ambiguity. Output is right padded with trailing spaces + * unless the pattern is marked with the fill mode modifier (FM). + * - For string to datetime conversion, the case of the pattern does not matter. + * + * MONTH|Month|month + * Name of month of year + * - For datetime to string conversion, will include trailing spaces up to length 9 (length of + * longest month of year name: "September"). Case is taken into account according to the + * following example (pattern => output): + * - MONTH => JANUARY + * - Month => January + * - month => january + * - For string to datetime conversion, neither the case of the pattern nor the case of the input + * are taken into account. + * - For string to datetime conversion, conflicts with MM and MON. + * + * + * MON|Mon|mon + * Abbreviated name of month of year + * - For datetime to string conversion, case is taken into account according to the following + * example (pattern => output): + * - MON => JAN + * - Mon => Jan + * - mon => jan + * - For string to datetime conversion, neither the case of the pattern nor the case of the input + * are taken into account. + * - For string to datetime conversion, conflicts with MM and MONTH. + * + * + * DAY|Day|day + * Name of day of week + * - For datetime to string conversion, will include trailing spaces until length is 9 (length of + * longest day of week name: "Wednesday"). Case is taken into account according to the following + * example (pattern => output): + * - DAY = SUNDAY + * - Day = Sunday + * - day = sunday + * - For string to datetime conversion, neither the case of the pattern nor the case of the input + * are taken into account. + * - Not allowed in string to datetime conversion. + * + * DY|Dy|dy + * Abbreviated name of day of week + * - For datetime to string conversion, case is taken into account according to the following + * example (pattern => output): + * - DY = SUN + * - Dy = Sun + * - dy = sun + * - For string to datetime conversion, neither the case of the pattern nor the case of the input + * are taken into account. + * - Not allowed in string to datetime conversion. * * B. Time zone tokens * TZH @@ -259,16 +345,18 @@ private static final int NANOS_MAX_LENGTH = 9; public static final int AM = 0; public static final int PM = 1; + private static final DateTimeFormatter MONTH_FORMATTER = DateTimeFormatter.ofPattern("MMM"); private String pattern; private List tokens = new ArrayList<>(); private boolean formatExact = false; - private static final Map TEMPORAL_TOKENS = + private static final Map NUMERIC_TEMPORAL_TOKENS = ImmutableMap.builder() .put("yyyy", ChronoField.YEAR).put("yyy", ChronoField.YEAR) .put("yy", ChronoField.YEAR).put("y", ChronoField.YEAR) .put("rrrr", ChronoField.YEAR).put("rr", ChronoField.YEAR) .put("mm", ChronoField.MONTH_OF_YEAR) + .put("d", WeekFields.SUNDAY_START.dayOfWeek()) .put("dd", ChronoField.DAY_OF_MONTH) .put("ddd", ChronoField.DAY_OF_YEAR) .put("hh", ChronoField.HOUR_OF_AMPM) @@ -284,8 +372,18 @@ .put("ff9", ChronoField.NANO_OF_SECOND).put("ff", ChronoField.NANO_OF_SECOND) .put("a.m.", ChronoField.AMPM_OF_DAY).put("am", ChronoField.AMPM_OF_DAY) .put("p.m.", ChronoField.AMPM_OF_DAY).put("pm", ChronoField.AMPM_OF_DAY) + .put("ww", ChronoField.ALIGNED_WEEK_OF_YEAR).put("w", ChronoField.ALIGNED_WEEK_OF_MONTH) + .put("q", IsoFields.QUARTER_OF_YEAR) .build(); + private static final Map CHARACTER_TEMPORAL_TOKENS = + ImmutableMap.builder() + .put("mon", ChronoField.MONTH_OF_YEAR) + .put("month", ChronoField.MONTH_OF_YEAR) + .put("day", ChronoField.DAY_OF_WEEK) + .put("dy", ChronoField.DAY_OF_WEEK) + .build(); + private static final Map TIME_ZONE_TOKENS = ImmutableMap.builder() .put("tzh", ChronoUnit.HOURS).put("tzm", ChronoUnit.MINUTES).build(); @@ -303,13 +401,15 @@ .put("hh12", 2).put("hh24", 2).put("tzm", 2).put("am", 4).put("pm", 4) .put("ff1", 1).put("ff2", 2).put("ff3", 3).put("ff4", 4).put("ff5", 5) .put("ff6", 6).put("ff7", 7).put("ff8", 8).put("ff9", 9).put("ff", 9) + .put("month", 9).put("day", 9).put("dy", 3) .build(); /** * Represents broad categories of tokens. */ public enum TokenType { - TEMPORAL, + NUMERIC_TEMPORAL, + CHARACTER_TEMPORAL, SEPARATOR, TIMEZONE, ISO_8601_DELIMITER, @@ -327,8 +427,9 @@ int length; // length (e.g. YYY: 3, FF8: 8) boolean fillMode; //FM, applies to type TEMPORAL only (later should apply to TIMEZONE as well) - public Token(TemporalField temporalField, String string, int length, boolean fillMode) { - this(TokenType.TEMPORAL, temporalField, null, string, length, fillMode); + public Token(TokenType tokenType, TemporalField temporalField, String string, int length, + boolean fillMode) { + this(tokenType, temporalField, null, string, length, fillMode); } public Token(TemporalUnit temporalUnit, String string, int length, boolean fillMode) { @@ -429,12 +530,18 @@ private void parsePatternToTokens(String pattern) { begin = end; break; } - if (isTemporalToken(candidate)) { + if (isNumericTemporalToken(candidate)) { lastAddedToken = parseTemporalToken(originalPattern, candidate, fillMode, begin); fillMode = false; begin = end; break; } + if (isCharacterTemporalToken(candidate)) { + lastAddedToken = parseCharacterTemporalToken(originalPattern, candidate, fillMode, begin); + fillMode = false; + begin = end; + break; + } if (isTimeZoneToken(candidate)) { lastAddedToken = parseTimeZoneToken(candidate, fillMode, begin); begin = end; @@ -468,8 +575,12 @@ private boolean isIso8601Delimiter(String candidate) { return candidate.length() == 1 && VALID_ISO_8601_DELIMITERS.contains(candidate); } - private boolean isTemporalToken(String candidate) { - return TEMPORAL_TOKENS.containsKey(candidate); + private boolean isNumericTemporalToken(String candidate) { + return NUMERIC_TEMPORAL_TOKENS.containsKey(candidate); + } + + private boolean isCharacterTemporalToken(String candidate) { + return CHARACTER_TEMPORAL_TOKENS.containsKey(candidate); } private boolean isTimeZoneToken(String pattern) { @@ -514,11 +625,24 @@ private Token parseIso8601DelimiterToken(String candidate, boolean fillMode, int private Token parseTemporalToken(String originalPattern, String candidate, boolean fillMode, int begin) { // for AM/PM, keep original case - if (TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) { + if (NUMERIC_TEMPORAL_TOKENS.get(candidate) == ChronoField.AMPM_OF_DAY) { int subStringEnd = begin + candidate.length(); candidate = originalPattern.substring(begin, subStringEnd); } - Token lastAddedToken = new Token(TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate, + Token lastAddedToken = new Token(TokenType.NUMERIC_TEMPORAL, + NUMERIC_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate, + getTokenStringLength(candidate), fillMode); + tokens.add(lastAddedToken); + return lastAddedToken; + } + + private Token parseCharacterTemporalToken(String originalPattern, String candidate, + boolean fillMode, int begin) { + // keep original case + candidate = originalPattern.substring(begin, begin + candidate.length()); + + Token lastAddedToken = new Token(TokenType.CHARACTER_TEMPORAL, + CHARACTER_TEMPORAL_TOKENS.get(candidate.toLowerCase()), candidate, getTokenStringLength(candidate), fillMode); tokens.add(lastAddedToken); return lastAddedToken; @@ -587,6 +711,24 @@ private void verifyForParse() { timeZoneTemporalUnits.add(token.temporalUnit); } } + + //check for illegal temporal fields + if (temporalFields.contains(IsoFields.QUARTER_OF_YEAR)) { + throw new IllegalArgumentException("Illegal field: q (" + IsoFields.QUARTER_OF_YEAR + ")"); + } + if (temporalFields.contains(WeekFields.SUNDAY_START.dayOfWeek())) { + throw new IllegalArgumentException("Illegal field: d (" + WeekFields.SUNDAY_START.dayOfWeek() + ")"); + } + if (temporalFields.contains(ChronoField.DAY_OF_WEEK)) { + throw new IllegalArgumentException("Illegal field: dy/day (" + ChronoField.DAY_OF_WEEK + ")"); + } + if (temporalFields.contains(ChronoField.ALIGNED_WEEK_OF_MONTH)) { + throw new IllegalArgumentException("Illegal field: w (" + ChronoField.ALIGNED_WEEK_OF_MONTH + ")"); + } + if (temporalFields.contains(ChronoField.ALIGNED_WEEK_OF_YEAR)) { + throw new IllegalArgumentException("Illegal field: ww (" + ChronoField.ALIGNED_WEEK_OF_YEAR + ")"); + } + if (!(temporalFields.contains(ChronoField.YEAR))) { throw new IllegalArgumentException("Missing year token."); } @@ -648,6 +790,14 @@ private void verifyForFormat() { throw new IllegalArgumentException(token.string.toUpperCase() + " not a valid format for " + "timestamp or date."); } + if (token.type == TokenType.CHARACTER_TEMPORAL) { + String s = token.string; + if (!(s.equals(s.toUpperCase()) || s.equals(capitalize(s)) || s.equals(s.toLowerCase()))) { + throw new IllegalArgumentException( + "Ambiguous capitalization of token " + s + ". Accepted " + "forms are " + s + .toUpperCase() + ", " + capitalize(s) + ", or " + s.toLowerCase() + "."); + } + } } } @@ -659,10 +809,15 @@ public String format(Timestamp ts) { LocalDateTime.ofEpochSecond(ts.toEpochSecond(), ts.getNanos(), ZoneOffset.UTC); for (Token token : tokens) { switch (token.type) { - case TEMPORAL: + case NUMERIC_TEMPORAL: + case CHARACTER_TEMPORAL: try { value = localDateTime.get(token.temporalField); - outputString = formatTemporal(value, token); + if (token.type == TokenType.NUMERIC_TEMPORAL) { + outputString = formatNumericTemporal(value, token); + } else { + outputString = formatCharacterTemporal(value, token); + } } catch (DateTimeException e) { throw new IllegalArgumentException(token.temporalField + " couldn't be obtained from " + "LocalDateTime " + localDateTime, e); @@ -690,7 +845,7 @@ public String format(Date date) { return format(Timestamp.ofEpochSecond(date.toEpochSecond())); } - private String formatTemporal(int value, Token token) { + private String formatNumericTemporal(int value, Token token) { String output; if (token.temporalField == ChronoField.AMPM_OF_DAY) { output = value == 0 ? "a" : "p"; @@ -713,6 +868,34 @@ private String formatTemporal(int value, Token token) { return output; } + private String formatCharacterTemporal(int value, Token token) { + String output = null; + if (token.temporalField == ChronoField.MONTH_OF_YEAR) { + output = Month.of(value).getDisplayName(TextStyle.FULL, Locale.US); + } else if (token.temporalField == ChronoField.DAY_OF_WEEK) { + output = DayOfWeek.of(value).getDisplayName(TextStyle.FULL, Locale.US); + } + if (output == null) { + throw new IllegalStateException("TemporalField: " + token.temporalField + " not valid for " + + "character formatting."); + } + + // set length + if (output.length() > token.length) { + output = output.substring(0, token.length); // truncate to length + } else if (!token.fillMode && output.length() < token.length) { + output = StringUtils.rightPad(output, token.length); //pad to size + } + + // set case + if (Character.isUpperCase(token.string.charAt(1))) { + output = output.toUpperCase(); + } else if (Character.isLowerCase(token.string.charAt(0))) { + output = output.toLowerCase(); + } + return output; + } + /** * To match token.length, pad left with zeroes or truncate. * Omit padding if fill mode (FM) modifier on. @@ -754,9 +937,15 @@ public Timestamp parseTimestamp(String fullInput){ for (Token token : tokens) { switch (token.type) { - case TEMPORAL: - substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy - value = parseTemporal(substring, token); // e.g. 18->2018, July->07 + case NUMERIC_TEMPORAL: + case CHARACTER_TEMPORAL: + if (token.type == TokenType.NUMERIC_TEMPORAL) { + substring = getNextNumericSubstring(fullInput, index, token); // e.g. yy-m -> yy + value = parseNumericTemporal(substring, token); // e.g. 18->2018 + } else { + substring = getNextCharacterSubstring(fullInput, index, token); //e.g. Marcharch -> March + value = parseCharacterTemporal(substring, token); // e.g. July->07 + } try { ldt = ldt.with(token.temporalField, value); } catch (DateTimeException e){ @@ -844,9 +1033,13 @@ private String getNextNumericSubstring(String s, int begin, int end, Token token return s; } } - // next non-numeric character is a delimiter. Don't worry about AM/PM since we've already - // handled that case. - if ((token.type == TokenType.TEMPORAL || token.type == TokenType.TIMEZONE) + // if it's a character temporal, the first non-letter character is a delimiter + if (token.type == TokenType.CHARACTER_TEMPORAL && s.matches(".*[^A-Za-z].*")) { + s = s.split("[^A-Za-z]", 2)[0]; + + // if it's a numeric element, next non-numeric character is a delimiter. Don't worry about + // AM/PM since we've already handled that case. + } else if ((token.type == TokenType.NUMERIC_TEMPORAL || token.type == TokenType.TIMEZONE) && s.matches(".*\\D.*")) { s = s.split("\\D", 2)[0]; } @@ -857,7 +1050,7 @@ private String getNextNumericSubstring(String s, int begin, int end, Token token /** * Get the integer value of a temporal substring. */ - private int parseTemporal(String substring, Token token){ + private int parseNumericTemporal(String substring, Token token){ checkFormatExact(substring, token); // exceptions to the rule @@ -898,6 +1091,53 @@ private int parseTemporal(String substring, Token token){ } } + private static final String MONTH_REGEX; + static { + StringBuilder sb = new StringBuilder(); + String or = ""; + for (Month month : Month.values()) { + sb.append(or).append(month); + or = "|"; + } + MONTH_REGEX = sb.toString(); + } + + private String getNextCharacterSubstring(String fullInput, int index, Token token) { + int end = index + token.length; + if (end > fullInput.length()) { + end = fullInput.length(); + } + String substring = fullInput.substring(index, end); + if (token.length == 3) { //dy, mon + return substring; + } + + Matcher matcher = Pattern.compile(MONTH_REGEX, Pattern.CASE_INSENSITIVE).matcher(substring); + if (matcher.find()) { + return substring.substring(0, matcher.end()); + } + throw new IllegalArgumentException( + "Couldn't find " + token.string + " in substring " + substring + " at index " + index); + } + + private int parseCharacterTemporal(String substring, Token token) { + try { + if (token.temporalField == ChronoField.MONTH_OF_YEAR) { + if (token.length == 3) { + return Month.from(MONTH_FORMATTER.parse(capitalize(substring))).getValue(); + } else { + return Month.valueOf(substring.toUpperCase()).getValue(); + } + } + } catch (Exception e) { + throw new IllegalArgumentException( + "Couldn't parse substring \"" + substring + "\" with token " + token + " to integer." + + "Pattern is " + pattern, e); + } + throw new IllegalArgumentException( + "token: (" + token + ") isn't a valid character temporal. Pattern is " + pattern); + } + /** * @throws IllegalArgumentException if input length doesn't match expected (token) length */ @@ -986,7 +1226,8 @@ private boolean nextTokenIs(String pattern, Token currentToken) { Token nextToken = tokens.get(tokens.indexOf(currentToken) + 1); pattern = pattern.toLowerCase(); return (isTimeZoneToken(pattern) && TIME_ZONE_TOKENS.get(pattern) == nextToken.temporalUnit - || isTemporalToken(pattern) && TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField); + || isNumericTemporalToken(pattern) && NUMERIC_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField + || isCharacterTemporalToken(pattern) && CHARACTER_TEMPORAL_TOKENS.get(pattern) == nextToken.temporalField); } public String getPattern() { @@ -999,4 +1240,8 @@ public String getPattern() { protected List getTokens() { return new ArrayList<>(tokens); } + + private static String capitalize(String substring) { + return WordUtils.capitalize(substring.toLowerCase()); + } } diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java index c0f9f7b2b5..ff41534fce 100644 --- common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java @@ -91,9 +91,26 @@ public void testSetPatternWithBadPatterns() { verifyBadPattern("yyyy-mm-dd SSSSS AM", true); verifyBadPattern("yyyy-mm-dd MI SSSSS", true); verifyBadPattern("yyyy-mm-dd SS SSSSS", true); + verifyBadPattern("yyyy mm-MON dd", true); + verifyBadPattern("yyyy mm-MONTH dd", true); + verifyBadPattern("yyyy MON, month dd", true); verifyBadPattern("tzm", false); verifyBadPattern("tzh", false); + + //ambiguous case for formatting + verifyBadPattern("MOnth", false); + verifyBadPattern("DaY", false); + verifyBadPattern("dAy", false); + verifyBadPattern("dY", false); + + //illegal for parsing + verifyBadPattern("yyyy-mm-dd q", true); + verifyBadPattern("yyyy-mm-dd d", true); + verifyBadPattern("yyyy-mm-dd dy", true); + verifyBadPattern("yyyy-mm-dd day", true); + verifyBadPattern("yyyy-mm-dd w", true); + verifyBadPattern("yyyy-mm-dd ww", true); } @Test @@ -106,6 +123,28 @@ public void testFormatTimestamp() { checkFormatTs("HH12 P.M.", "2019-01-01 00:15:10", "12 A.M."); checkFormatTs("HH12 AM", "2019-01-01 12:15:10", "12 PM"); checkFormatTs("YYYY-MM-DD HH12PM", "2017-05-05 00:00:00", "2017-05-05 12AM"); + + checkFormatTs("YYYY-MONTH-DD", "2019-01-01 00:00:00", "2019-JANUARY -01"); //fill to length 9 + checkFormatTs("YYYY-Month-DD", "2019-01-01 00:00:00", "2019-January -01"); + checkFormatTs("YYYY-month-DD", "2019-01-01 00:00:00", "2019-january -01"); + checkFormatTs("YYYY-MON-DD", "2019-01-01 00:00:00", "2019-JAN-01"); + checkFormatTs("YYYY-Mon-DD", "2019-01-01 00:00:00", "2019-Jan-01"); + checkFormatTs("YYYY-mon-DD", "2019-01-01 00:00:00", "2019-jan-01"); + + checkFormatTs("D: DAY", "2019-01-01 00:00:00", "3: TUESDAY "); //fill to length 9 + checkFormatTs("D: Day", "2019-01-02 00:00:00", "4: Wednesday"); + checkFormatTs("D: day", "2019-01-03 00:00:00", "5: thursday "); + checkFormatTs("D: DY", "2019-01-04 00:00:00", "6: FRI"); + checkFormatTs("D: Dy", "2019-01-05 00:00:00", "7: Sat"); + checkFormatTs("D: dy", "2019-01-06 00:00:00", "1: sun"); + checkFormatTs("D: DAY", "2019-01-07 00:00:00", "2: MONDAY "); + + checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-01 00:00:00", "2019-01-01: 1 01 1"); + checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-07 00:00:00", "2019-01-07: 1 01 1"); + checkFormatTs("YYYY-mm-dd: Q WW W", "2019-01-08 00:00:00", "2019-01-08: 1 02 2"); + checkFormatTs("YYYY-mm-dd: Q WW W", "2019-03-31 00:00:00", "2019-03-31: 1 13 5"); + checkFormatTs("YYYY-mm-dd: Q WW W", "2019-04-01 00:00:00", "2019-04-01: 2 13 1"); + checkFormatTs("YYYY-mm-dd: Q WW W", "2019-12-31 00:00:00", "2019-12-31: 4 53 5"); } private void checkFormatTs(String pattern, String input, String expectedOutput) { @@ -188,6 +227,18 @@ public void testParseTimestamp() { checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM+0515", "2018-12-31 08:00:00"); checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM0515", "2018-12-31 08:00:00"); checkParseTimestamp("YYYYMMDDHH12MIA.M.TZHTZM", "201812310800AM-0515", "2018-12-31 08:00:00"); + + //MONTH, MON : case really doesn't matter + checkParseTimestamp("yyyy-MONTH-dd", "2018-FEBRUARY-28", "2018-02-28 00:00:00"); + checkParseTimestamp("yyyy-Month-dd", "2018-february-28", "2018-02-28 00:00:00"); + checkParseTimestamp("yyyy-month-dd", "2018-FEBRUARY-28", "2018-02-28 00:00:00"); + checkParseTimestamp("yyyy-montH-dd", "2018-febRuary-28", "2018-02-28 00:00:00"); + checkParseTimestamp("yyyy-MON-dd", "2018-FEB-28", "2018-02-28 00:00:00"); + checkParseTimestamp("yyyy-moN-dd", "2018-FeB-28", "2018-02-28 00:00:00"); + checkParseTimestamp("yyyy-mon-dd", "2018-FEB-28", "2018-02-28 00:00:00"); + //letters and numbers are delimiters to each other, respectively + checkParseDate("yyyy-ddMONTH", "2018-4March", "2018-03-04"); + checkParseDate("yyyy-MONTHdd", "2018-March4", "2018-03-04"); } private int getFirstTwoDigits() { @@ -229,6 +280,9 @@ public void testParseDate() { checkParseDate("rrrr-mm-dd", "99-02-03", firstTwoDigits + "99-02-03"); checkParseDate("yyyy-mm-dd hh mi ss.ff7", "2018/01/01 2.2.2.55", "2018-01-01"); + + checkParseDate("dd/MonthT/yyyy", "31/AugustT/2020", "2020-08-31"); + checkParseDate("dd/MonthT/yyyy", "31/MarchT/2020", "2020-03-31"); } private void checkParseDate(String pattern, String input, String expectedOutput) { @@ -247,6 +301,11 @@ public void testParseTimestampError() { verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +16:00"); //tzh out of range verifyBadParseString("yyyy-mm-dd tzh:tzm", "2019-01-01 +14:60"); //tzm out of range verifyBadParseString("YYYY DDD", "2000 367"); //ddd out of range + verifyBadParseString("yyyy-month-dd", "2019-merch-23"); //invalid month of year + verifyBadParseString("yyyy-mon-dd", "2019-mer-23"); //invalid month of year + verifyBadParseString("yyyy-MON-dd", "2018-FEBRUARY-28"); // can't mix and match mon and month + verifyBadParseString("yyyy-MON-dd", "2018-FEBR-28"); + verifyBadParseString("yyyy-MONTH-dd", "2018-FEB-28"); } private void verifyBadPattern(String string, boolean forParsing) { @@ -261,12 +320,16 @@ private void verifyBadPattern(String string, boolean forParsing) { @Test public void testFm() { - //fm //year (019) becomes 19 even if pattern is yyy checkFormatTs("FMyyy-FMmm-dd FMHH12:MI:FMSS", "2019-01-01 01:01:01", "19-1-01 1:01:1"); //ff[1-9] shouldn't be affected, because leading zeroes hold information checkFormatTs("FF5/FMFF5", "2019-01-01 01:01:01.0333", "03330/03330"); checkFormatTs("FF/FMFF", "2019-01-01 01:01:01.0333", "0333/0333"); + //omit trailing spaces from character temporal elements + checkFormatTs("YYYY-fmMonth-DD", "2019-01-01 00:00:00", "2019-January-01"); + checkFormatTs("D: fmDAY", "2019-01-01 00:00:00", "3: TUESDAY"); + checkFormatTs("D: fmDay", "2019-01-02 00:00:00", "4: Wednesday"); + //only affects temporals that immediately follow verifyBadPattern("yyy-mm-dd FM,HH12", false); verifyBadPattern("yyy-mm-dd FM,HH12", true); @@ -301,6 +364,8 @@ public void testFx() { checkParseTimestamp("FXDD-MM-YYYY hh12 A.M.", "01-01-1998 12 p.m.", "1998-01-01 12:00:00"); verifyBadParseString("FXDD-MM-YYYY hh12 am", "01-01-1998 12 p.m."); verifyBadParseString("FXDD-MM-YYYY hh12 a.m.", "01-01-1998 12 pm"); + //character temporals shouldn't have trailing spaces + checkParseTimestamp("FXDD-month-YYYY", "15-March-1998", "1998-03-15 00:00:00"); } @Test @@ -330,6 +395,9 @@ public void testText() { // non-numeric characters in text counts as a delimiter checkParseDate("yyyy\"m\"mm\"d\"dd", "19m1d1", LocalDate.now().getYear() / 100 + "19-01-01"); checkParseDate("yyyy\"[\"mm\"]\"dd", "19[1]1", LocalDate.now().getYear() / 100 + "19-01-01"); + // parse character temporals correctly + checkParseDate("dd/Month\"arch\"/yyyy", "31/Marcharch/2020", "2020-03-31"); + checkParseDate("dd/Month\"ember\"/yyyy", "31/Decemberember/2020", "2020-12-31"); // single quotes are separators and not text delimiters checkParseTimestamp("\"Y\'ear \"YYYY \' \"month\" MM \"day\" DD.\"!\"",