diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java new file mode 100644 index 0000000000..4243d80a40 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveDateTimeFormatter.java @@ -0,0 +1,52 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +public interface HiveDateTimeFormatter { + + /** + * Only used for HiveSimpleDateFormatter, which is a wrapper for the given SimpleDateFormat object. + */ + void setFormatter(SimpleDateFormat simpleDateFormat) throws WrongFormatterException; + + /** + * Only used for HiveJavaDateTimeFormatter, which is a wrapper for the given DateTimeFormatter object. + */ + void setFormatter(DateTimeFormatter dateTimeFormatter) throws WrongFormatterException; + + /** + * Format the given timestamp into a string. + */ + String format(Timestamp ts); + + /** + * Parse the given string into a timestamp. + * + * @throws ParseException if string cannot be parsed. + */ + Timestamp parse(String string) throws ParseException; + + /** + * Set the format pattern to be used for formatting timestamps or parsing strings. + * Different HiveDateTimeFormatter implementations interpret some patterns differently. For example, + * HiveSimpleDateFormatter interprets the string "mm" as minute, while HiveSqlDateTimeFormatter + * interprets it as month. + */ + void setPattern(String pattern); + + /** + * Set the time zone of the formatter. Only HiveSimpleDateFormatter uses this. + */ + void setTimeZone(TimeZone timeZone); + + public enum FormatterType { + SIMPLE_DATE_FORMAT, + JAVA_TIME_DATETIMEFORMATTER, + SQL_2016 + } + +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java new file mode 100644 index 0000000000..374a06e72b --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveJavaDateTimeFormatter.java @@ -0,0 +1,42 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +/** + * Wrapper for DateTimeFormatter in the java.time package. + */ +public class HiveJavaDateTimeFormatter implements HiveDateTimeFormatter { + + private DateTimeFormatter formatter; + + @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) { + this.formatter = dateTimeFormatter; + } + + @Override public String format(Timestamp ts) { + return formatter.format( + LocalDateTime.ofInstant + (Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()), ZoneId.of("UTC"))); + } + + @Override public Timestamp parse(String string) { + return Timestamp.valueOf(string); + } + + // unused methods + @Override public void setPattern(String pattern) {} + @Override public void setTimeZone(TimeZone timeZone) {} + @Override public void setFormatter(SimpleDateFormat simpleDateFormat) + throws WrongFormatterException { + throw new WrongFormatterException("HiveJavaDateTimeFormatter formatter wraps an object of type" + + "java.time.format.DateTimeFormatter, formatter cannot be of type " + + "java.text.SimpleDateFormat"); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java new file mode 100644 index 0000000000..bf620c65fd --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSimpleDateFormatter.java @@ -0,0 +1,54 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.Date; +import java.util.TimeZone; + +/** + * Wrapper for java.text.SimpleDateFormat. + */ +public class HiveSimpleDateFormatter implements HiveDateTimeFormatter { + + private SimpleDateFormat format = new SimpleDateFormat(); + + public HiveSimpleDateFormatter() {} + + @Override public void setFormatter(SimpleDateFormat simpleDateFormat) { + this.format = simpleDateFormat; + } + + @Override public String format(Timestamp ts) { + Date date = new Date(ts.toEpochMilli()); + return format.format(date); + } + + @Override public Timestamp parse(String string) throws ParseException { + try { + Date date = format.parse(string); + return Timestamp.ofEpochMilli(date.getTime()); + } catch (java.text.ParseException e) { + throw new ParseException( + "String " + string + " could not be parsed by java.text.SimpleDateFormat: " + format); + } + } + + @Override public void setPattern(String pattern) { + format.applyPattern(pattern); + } + + @Override public void setTimeZone(TimeZone timeZone) { + format.setTimeZone(timeZone); + } + + /// unused methods + @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) + throws WrongFormatterException { + throw new WrongFormatterException( + "HiveSimpleDateFormatter formatter wraps an object of type java.text.SimpleDateFormat, " + + "formatter cannot be of type java.time.format.DateTimeFormatter"); + } + +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java new file mode 100644 index 0000000000..b5ae4c5140 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/HiveSqlDateTimeFormatter.java @@ -0,0 +1,45 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; + +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.TimeZone; + +/** + * Formatter using SQL:2016 datetime patterns. + */ + +public class HiveSqlDateTimeFormatter implements HiveDateTimeFormatter { + + private String pattern; + + public HiveSqlDateTimeFormatter() {} + + @Override public void setPattern(String pattern) { + this.pattern = pattern; + } + + @Override public String format(Timestamp ts) { + //TODO + return null; + } + + @Override public Timestamp parse(String string) throws ParseException { + //TODO + return null; + } + + // unused methods + @Override public void setTimeZone(TimeZone timeZone) {} + @Override public void setFormatter(DateTimeFormatter dateTimeFormatter) + throws WrongFormatterException { + throw new WrongFormatterException("HiveSqlDateTimeFormatter is not a wrapper for " + + "java.time.format.DateTimeFormatter, use HiveJavaDateTimeFormatter instead."); + } + @Override public void setFormatter(SimpleDateFormat simpleDateFormat) + throws WrongFormatterException { + throw new WrongFormatterException("HiveSqlDateTimeFormatter is not a wrapper for " + + "java.text.SimpleDateFormat, use HiveSimpleDateFormatter instead."); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java new file mode 100644 index 0000000000..2a90e7b04e --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/ParseException.java @@ -0,0 +1,8 @@ +package org.apache.hadoop.hive.common.format.datetime; + +public class ParseException extends Exception { + + public ParseException(String message) { + super(message); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java new file mode 100644 index 0000000000..ccb33cc643 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/format/datetime/WrongFormatterException.java @@ -0,0 +1,8 @@ +package org.apache.hadoop.hive.common.format.datetime; + +public class WrongFormatterException extends Exception { + + public WrongFormatterException(String message) { + super(message); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java index 6ecfcf65c9..9f348c4ace 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Date.java +++ common/src/java/org/apache/hadoop/hive/common/type/Date.java @@ -17,6 +17,11 @@ */ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; +import org.apache.hadoop.hive.common.format.datetime.WrongFormatterException; + import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -54,9 +59,12 @@ } private LocalDate localDate; + private HiveDateTimeFormatter printFormatter; + private HiveDateTimeFormatter parseFormatter; private Date(LocalDate localDate) { this.localDate = localDate != null ? localDate : EPOCH; + initFormatters(); } public Date() { @@ -67,9 +75,21 @@ public Date(Date d) { this(d.localDate); } + + private void initFormatters() { + try { + printFormatter = new HiveJavaDateTimeFormatter(); + printFormatter.setFormatter(PRINT_FORMATTER); + parseFormatter = new HiveJavaDateTimeFormatter(); + parseFormatter.setFormatter(PARSE_FORMATTER); + } catch (WrongFormatterException e) { + throw new RuntimeException("Wrong formatter", e); + } + } + @Override public String toString() { - return localDate.format(PRINT_FORMATTER); + return printFormatter.format(Timestamp.ofEpochMilli(toEpochMilli())); } public int hashCode() { @@ -137,6 +157,16 @@ public static Date valueOf(String s) { return new Date(localDate); } + public static Date valueOf(String s, HiveDateTimeFormatter formatter) { + s = s.trim(); + try { + return Date.ofEpochMilli(formatter.parse(s).toEpochMilli()); + } catch (ParseException e) { + // Fall back to original + return valueOf(s); + } + } + public static Date ofEpochDay(int epochDay) { return new Date(LocalDate.ofEpochDay(epochDay)); } diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java index a8b7b6d186..a162437510 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java +++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java @@ -17,6 +17,11 @@ */ package org.apache.hadoop.hive.common.type; +import org.apache.hadoop.hive.common.format.datetime.ParseException; +import org.apache.hadoop.hive.common.format.datetime.WrongFormatterException; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter; + import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -43,8 +48,11 @@ public class Timestamp implements Comparable { private static final LocalDateTime EPOCH = LocalDateTime.of(1970, 1, 1, 0, 0, 0); - private static final DateTimeFormatter PARSE_FORMATTER; - private static final DateTimeFormatter PRINT_FORMATTER; + + private HiveDateTimeFormatter printFormatter; + private HiveDateTimeFormatter parseFormatter; + private static final DateTimeFormatter PARSE_DATETIME_FORMATTER; + private static final DateTimeFormatter PRINT_DATETIME_FORMATTER; static { DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); @@ -64,13 +72,13 @@ .appendValue(SECOND_OF_MINUTE, 1, 2, SignStyle.NORMAL) .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).optionalEnd() .optionalEnd(); - PARSE_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT); + PARSE_DATETIME_FORMATTER = builder.toFormatter().withResolverStyle(ResolverStyle.LENIENT); builder = new DateTimeFormatterBuilder(); // Date and time parts builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); // Fractional part builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd(); - PRINT_FORMATTER = builder.toFormatter(); + PRINT_DATETIME_FORMATTER = builder.toFormatter(); } private LocalDateTime localDateTime; @@ -78,6 +86,7 @@ /* Private constructor */ private Timestamp(LocalDateTime localDateTime) { this.localDateTime = localDateTime != null ? localDateTime : EPOCH; + initFormatters(); } public Timestamp() { @@ -88,6 +97,17 @@ public Timestamp(Timestamp t) { this(t.localDateTime); } + private void initFormatters() { + try { + printFormatter = new HiveJavaDateTimeFormatter(); + printFormatter.setFormatter(PRINT_DATETIME_FORMATTER); + parseFormatter = new HiveJavaDateTimeFormatter(); + parseFormatter.setFormatter(PARSE_DATETIME_FORMATTER); + } catch (WrongFormatterException e) { + throw new RuntimeException("Wrong formatter", e); + } + } + public void set(Timestamp t) { this.localDateTime = t != null ? t.localDateTime : EPOCH; } @@ -98,7 +118,8 @@ public String format(DateTimeFormatter formatter) { @Override public String toString() { - return localDateTime.format(PRINT_FORMATTER); + return printFormatter.format( + Timestamp.ofEpochMilli(localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli(), getNanos())); } public int hashCode() { @@ -154,7 +175,7 @@ public static Timestamp valueOf(String s) { s = s.trim(); LocalDateTime localDateTime; try { - localDateTime = LocalDateTime.parse(s, PARSE_FORMATTER); + localDateTime = LocalDateTime.parse(s, PARSE_DATETIME_FORMATTER); } catch (DateTimeParseException e) { // Try ISO-8601 format try { @@ -166,6 +187,16 @@ public static Timestamp valueOf(String s) { return new Timestamp(localDateTime); } + public static Timestamp valueOf(String s, HiveDateTimeFormatter formatter) { + s = s.trim(); + try { + return formatter.parse(s); + } catch (ParseException e) { + // Fall back to original + return valueOf(s); + } + } + public static Timestamp ofEpochSecond(long epochSecond) { return ofEpochSecond(epochSecond, 0); } diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java new file mode 100644 index 0000000000..a439cf3c57 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveJavaDateTimeFormatter.java @@ -0,0 +1,41 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; + +public class TestHiveJavaDateTimeFormatter { + + private static DateTimeFormatter DATE_TIME_FORMATTER; + static { + DateTimeFormatterBuilder + builder = new DateTimeFormatterBuilder(); + builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + builder.optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true).optionalEnd(); + DATE_TIME_FORMATTER = builder.toFormatter(); + } + private HiveDateTimeFormatter formatter = new HiveJavaDateTimeFormatter(); + + @Before + public void setUp() throws WrongFormatterException { + formatter.setFormatter(DATE_TIME_FORMATTER); + } + + @Test + public void testFormat() { + Timestamp ts = Timestamp.valueOf("2019-01-01 00:00:00.99999"); + Assert.assertEquals("2019-01-01 00:00:00.99999", formatter.format(ts)); + } + + @Test + public void testParse() throws ParseException { + String s = "2019-01-01 00:00:00.99999"; + Assert.assertEquals(Timestamp.valueOf("2019-01-01 00:00:00.99999"), formatter.parse(s)); + } + +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java new file mode 100644 index 0000000000..5c96acad49 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSimpleDateFormatter.java @@ -0,0 +1,47 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.text.SimpleDateFormat; +import java.time.ZoneOffset; +import java.util.TimeZone; + + +public class TestHiveSimpleDateFormatter { + + private HiveDateTimeFormatter formatter = new HiveSimpleDateFormatter(); + + @Before + public void setUp() throws WrongFormatterException { + formatter.setFormatter(new SimpleDateFormat()); + formatter.setPattern("yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone(ZoneOffset.UTC)); + } + + @Test + public void testFormat() { + verifyFormat("2019-01-01 01:01:01"); + verifyFormat("2019-01-01 00:00:00"); + verifyFormat("1960-01-01 23:00:00"); + } + + private void verifyFormat(String s) { + Timestamp ts = Timestamp.valueOf(s); + Assert.assertEquals(s, formatter.format(ts)); + } + + @Test + public void testParse() throws ParseException { + verifyParse("2019-01-01 01:10:10"); + verifyParse("1960-01-01 23:00:00"); + + } + + private void verifyParse(String s) throws ParseException { + Timestamp ts = Timestamp.valueOf(s); + Assert.assertEquals(ts, formatter.parse(s)); + } +} diff --git common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java new file mode 100644 index 0000000000..580f2598cf --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/format/datetime/TestHiveSqlDateTimeFormatter.java @@ -0,0 +1,16 @@ +package org.apache.hadoop.hive.common.format.datetime; + +import org.junit.Test; + +public class TestHiveSqlDateTimeFormatter { + + private HiveDateTimeFormatter formatter = new HiveSqlDateTimeFormatter(); + + @Test + public void testFormat() { + } + + @Test + public void testParse() throws ParseException { + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5ff338660f..50d463f877 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -35,6 +35,7 @@ import java.util.regex.Pattern; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToCharViaLongToChar; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java index dfa9f8a00d..9f4ac5f3c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java @@ -18,27 +18,32 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; import java.sql.Date; -import java.text.SimpleDateFormat; import java.util.TimeZone; public class CastDateToString extends LongToStringUnaryUDF { private static final long serialVersionUID = 1L; protected transient Date dt = new Date(0); - private transient SimpleDateFormat formatter; + private transient HiveDateTimeFormatter formatter; public CastDateToString() { super(); - formatter = new SimpleDateFormat("yyyy-MM-dd"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + initFormatter(); } public CastDateToString(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); - formatter = new SimpleDateFormat("yyyy-MM-dd"); + initFormatter(); + } + + private void initFormatter() { + formatter = new HiveSimpleDateFormatter(); + formatter.setPattern("yyyy-MM-dd"); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } @@ -49,8 +54,10 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { @Override protected void func(BytesColumnVector outV, long[] vector, int i) { - dt.setTime(DateWritableV2.daysToMillis((int) vector[i])); - byte[] temp = formatter.format(dt).getBytes(); + byte[] temp = formatter.format( + Timestamp.ofEpochMilli( + org.apache.hadoop.hive.common.type.Date.ofEpochDay((int) vector[i]).toEpochMilli())) + .getBytes(); assign(outV, i, temp, temp.length); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java index adc3a9d7b9..582d5d306d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToString.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveJavaDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.WrongFormatterException; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import java.sql.Timestamp; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -42,12 +45,25 @@ PRINT_FORMATTER = builder.toFormatter(); } + HiveDateTimeFormatter format; + public CastTimestampToString() { super(); + initFormatter(); } public CastTimestampToString(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); + initFormatter(); + } + + private void initFormatter() { + try { + format = new HiveJavaDateTimeFormatter(); + format.setFormatter(PRINT_FORMATTER); + } catch (WrongFormatterException e) { + throw new RuntimeException(e); + } } // The assign method will be overridden for CHAR and VARCHAR. @@ -57,16 +73,24 @@ protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { @Override protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { - byte[] temp = LocalDateTime.ofInstant(Instant.ofEpochMilli(inV.time[i]), ZoneOffset.UTC) - .withNano(inV.nanos[i]) - .format(PRINT_FORMATTER).getBytes(); + String formattedLocalDateTime = format.format( + org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(inV.time[i], inV.nanos[i])); + + byte[] temp = formattedLocalDateTime.getBytes(); assign(outV, i, temp, temp.length); } - public static String getTimestampString(Timestamp ts) { + public static String getTimestampString(java.sql.Timestamp ts) { return LocalDateTime.ofInstant(Instant.ofEpochMilli(ts.getTime()), ZoneOffset.UTC) .withNano(ts.getNanos()) .format(PRINT_FORMATTER); } + + public static String getTimestampString(java.sql.Timestamp ts, HiveDateTimeFormatter formatter) { + if (formatter == null) { + return getTimestampString(ts); + } + return formatter.format(Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos())); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java index 3cee0c1d1c..221d4eee71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFromUnixTime.java @@ -18,12 +18,16 @@ package org.apache.hadoop.hive.ql.udf; -import java.text.SimpleDateFormat; import java.util.Date; import java.util.TimeZone; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -38,7 +42,9 @@ + " > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;\n" + " '1970-01-01 00:00:00'") public class UDFFromUnixTime extends UDF { - private SimpleDateFormat formatter; + private HiveDateTimeFormatter formatter; + private boolean useLegacyFormats = true; + private boolean lastUsedLegacyFormats = true; private Text result = new Text(); private Text lastFormat = new Text(); @@ -118,15 +124,32 @@ public Text evaluate(IntWritable unixtime, Text format) { * @return elapsed time in the given format. */ private Text eval(long unixtime, Text format) { + initFormatter(); + if (!format.equals(lastFormat)) { - formatter = new SimpleDateFormat(format.toString()); + formatter.setPattern(format.toString()); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); lastFormat.set(format); } // convert seconds to milliseconds - Date date = new Date(unixtime * 1000L); - result.set(formatter.format(date)); + Timestamp ts = Timestamp.ofEpochMilli(unixtime * 1000L); + result.set(formatter.format(ts)); return result; } + + private void initFormatter() { + SessionState ss = SessionState.get(); + if (ss != null) { + useLegacyFormats = ss.getConf().getBoolVar(null); //TODO frogmethod different var, of course + } + if (formatter == null || useLegacyFormats != lastUsedLegacyFormats) { + if (useLegacyFormats) { + formatter = new HiveSimpleDateFormatter(); + } else { + formatter = new HiveSqlDateTimeFormatter(); + } + lastUsedLegacyFormats = useLegacyFormats; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 6597f4b34b..b4699c0359 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -23,16 +23,21 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSimpleDateFormatter; +import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritableV2; @@ -638,4 +643,21 @@ protected String getArgOrder(int i) { return i + ORDINAL_SUFFIXES[i % 10]; } } + + protected String getConfValue(HiveConf.ConfVars confVars) { + SessionState ss = SessionState.get(); + if (ss != null) { + return ss.getConf().getVar(confVars); + } + return null; + } + + protected HiveDateTimeFormatter getDateTimeFormat() { + boolean useLegacy = true; //TODO use getConfValue() to access legacy or SQL2016 datetime format + if (useLegacy) { + return new HiveSimpleDateFormatter(); + } else { + return new HiveSqlDateTimeFormatter(); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java index 6df0913de6..ac289f7b9b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java @@ -22,10 +22,10 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP; -import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.TimeZone; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.Description; @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.DateUtils; /** * GenericUDFAddMonths. @@ -52,9 +51,9 @@ + "Returns the date that is num_months after start_date.", extended = "start_date is a string or timestamp indicating a valid date. " + "num_months is a number. output_date_format is an optional String which specifies the format for output.\n" - + "The default output format is 'YYYY-MM-dd'.\n" + + "The default output format is 'yyyy-MM-dd'.\n" + "Example:\n > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'." - + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;\n" + + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'yyyy-MM-dd HH:mm:ss') LIMIT 1;\n" + "'2018-02-28 14:15:16'.\n") @NDV(maxNdv = 250) // 250 seems to be reasonable upper limit for this public class GenericUDFAddMonths extends GenericUDF { @@ -63,7 +62,7 @@ private transient Converter[] dtConverters = new Converter[3]; private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); - private transient SimpleDateFormat formatter = null; + private transient HiveDateTimeFormatter formatter = null; private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); private transient Integer numMonthsConst; private transient boolean isNumMonthsConst; @@ -81,7 +80,8 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen checkArgGroups(arguments, 2, tsInputTypes, STRING_GROUP); String fmtStr = getConstantStringValue(arguments, 2); if (fmtStr != null) { - formatter = new SimpleDateFormat(fmtStr); + formatter = getDateTimeFormat(); + formatter.setPattern(fmtStr); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } } else { @@ -90,8 +90,11 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } } if (formatter == null) { - //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd - formatter = DateUtils.getDateFormat(); + //If the DateFormat is not provided by the user or is invalid, use the default format yyyy-MM-dd +// formatter = DateUtils.getDateFormat(); //TODO frogmethod: this was the threadlocal + formatter = getDateTimeFormat(); + formatter.setPattern("yyyy-MM-dd"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } // the function should support both short date and full timestamp format @@ -143,7 +146,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } } - String res = formatter.format(calendar.getTime()); + String res = formatter.format(Timestamp.ofEpochMilli(calendar.getTimeInMillis())); output.set(res); return output; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java index 6d3e86f921..13abdbcc54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java @@ -20,9 +20,9 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; -import java.text.SimpleDateFormat; import java.util.TimeZone; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.Description; @@ -56,7 +56,7 @@ private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[2]; private final java.util.Date date = new java.util.Date(); private final Text output = new Text(); - private transient SimpleDateFormat formatter; + private transient HiveDateTimeFormatter formatter; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -79,10 +79,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen String fmtStr = getConstantStringValue(arguments, 1); if (fmtStr != null) { try { - formatter = new SimpleDateFormat(fmtStr); + formatter = getDateTimeFormat(); + formatter.setPattern(fmtStr); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); } catch (IllegalArgumentException e) { - // ignore + //reset formatter if something went wrong + formatter = null; } } } else { @@ -110,8 +112,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { ts = Timestamp.ofEpochMilli(d.toEpochMilli()); } - date.setTime(ts.toEpochMilli()); - String res = formatter.format(date); + String res = formatter.format(ts); if (res == null) { return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java index bcc4114099..4bff196d2f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java @@ -46,7 +46,6 @@ + " '2009-07-29'") @VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class}) public class GenericUDFDateSub extends GenericUDFDateAdd { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); public GenericUDFDateSub() { this.signModifier = -1; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 3c3796e8a6..0bac41cc29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -18,11 +18,11 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.TimeZone; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.format.datetime.HiveDateTimeFormatter; +import org.apache.hadoop.hive.common.format.datetime.ParseException; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.ql.exec.Description; @@ -62,7 +62,7 @@ private transient Converter patternConverter; private transient String lasPattern = "yyyy-MM-dd HH:mm:ss"; - private transient final SimpleDateFormat formatter = new SimpleDateFormat(lasPattern); + private transient HiveDateTimeFormatter formatter = null; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -82,6 +82,8 @@ protected void initializeInput(ObjectInspector[] arguments) throws UDFArgumentEx } } + formatter = getDateTimeFormat(); + formatter.setPattern(lasPattern); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); PrimitiveObjectInspector arg1OI = (PrimitiveObjectInspector) arguments[0]; @@ -145,12 +147,12 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return null; } if (!patternVal.equals(lasPattern)) { - formatter.applyPattern(patternVal); + formatter.setPattern(patternVal); lasPattern = patternVal; } } try { - retValue.set(formatter.parse(textVal).getTime() / 1000); + retValue.set(formatter.parse(textVal).toEpochMilli() / 1000); return retValue; } catch (ParseException e) { return null; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index 663237739e..092f97fb97 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -116,17 +116,18 @@ public void testRoundToDecimalPlaces() throws HiveException { } static int DAYS_LIMIT = 365 * 9999; + static int SMALLEST_EPOCH_DAY = -1 * 365 * 1969; // approximate... so we get some negative values public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] intValues) { Random r = new Random(12099); VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector inV; TimestampColumnVector outV; - inV = new LongColumnVector(); - outV = new TimestampColumnVector(); + inV = new LongColumnVector(intValues.length); + outV = new TimestampColumnVector(intValues.length); for (int i = 0; i < intValues.length; i++) { - intValues[i] = r.nextInt() % DAYS_LIMIT; + intValues[i] = SMALLEST_EPOCH_DAY + r.nextInt() % DAYS_LIMIT; inV.vector[i] = intValues[i]; } @@ -137,6 +138,14 @@ public static VectorizedRowBatch getVectorizedRowBatchDateInTimestampOut(int[] i return batch; } + public static VectorizedRowBatch getVectorizedRowBatchDateInStringOut(int[] intValues) { + // get date in timestamp out, and change timestamp out to string out + VectorizedRowBatch batch = getVectorizedRowBatchDateInTimestampOut(intValues); + BytesColumnVector outV = new BytesColumnVector(intValues.length); + batch.cols[1] = outV; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() { VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector lcv; @@ -297,6 +306,30 @@ public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] return batch; } + + public static VectorizedRowBatch getVectorizedRowBatchTimestampInStringOut(long[] epochSecondValues, int[] nanoValues) { + Random r = new Random(345); + VectorizedRowBatch batch = new VectorizedRowBatch(2); + batch.size = epochSecondValues.length; + + TimestampColumnVector inV; + BytesColumnVector outV; + inV = new TimestampColumnVector(batch.size); + outV = new BytesColumnVector(batch.size); + + for (int i = 0; i < batch.size; i++) { + Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r); + epochSecondValues[i] = randTimestamp.toEpochSecond(); + nanoValues[i] = randTimestamp.getNanos(); + inV.set(i, randTimestamp.toSqlTimestamp()); + } + + batch.cols[0] = inV; + batch.cols[1] = outV; + + return batch; + } + static long SECONDS_LIMIT = 60L * 24L * 365L * 9999L; public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut(long[] longValues) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index 58fd7b030e..6cf7192922 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -23,7 +23,9 @@ import static org.junit.Assert.assertTrue; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -72,6 +74,31 @@ public void testVectorCastDoubleToLong() throws HiveException { Assert.assertEquals(1, resultV.vector[6]); } + // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final + // value because VM in test time zone is PST. + private static long TIME_DIFFERENCE = 28800000L; + @Test + public void testCastDateToString() throws HiveException { + int[] intValues = new int[100]; + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDateInStringOut(intValues); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastDateToString(0, 1); + expr.evaluate(b); + + String expected, result; + for (int i = 0; i < intValues.length; i++) { + expected = + new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString(); + byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(subbyte, StandardCharsets.UTF_8); + + Assert.assertEquals("Index: " + i + " Epoch day value: " + intValues[i], expected, result); + } + } + + @Test public void testCastDateToTimestamp() throws HiveException { int[] intValues = new int[500]; @@ -192,6 +219,31 @@ public void testCastTimestampToDouble() throws HiveException { } } + @Test + public void testCastTimestampToString() throws HiveException { + int numberToTest = 100; + long[] epochSecondValues = new long[numberToTest]; + int[] nanoValues = new int[numberToTest]; + VectorizedRowBatch b = + TestVectorMathFunctions.getVectorizedRowBatchTimestampInStringOut(epochSecondValues, nanoValues); + BytesColumnVector resultV = (BytesColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastTimestampToString(0, 1); + expr.evaluate(b); + + String expected, result; + for (int i = 0; i < numberToTest; i++) { + expected = org.apache.hadoop.hive.common.type.Timestamp + .ofEpochSecond(epochSecondValues[i], nanoValues[i]).toString(); + byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], + resultV.start[i] + resultV.length[i]); + result = new String(subbyte, StandardCharsets.UTF_8); + Assert.assertEquals("Index: " + i + " Seconds since epoch: " + epochSecondValues[i] + + " nanoseconds: " + nanoValues[i], + expected, result); + } + } + public byte[] toBytes(String s) { byte[] b = null; try { diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java new file mode 100644 index 0000000000..de19508a7d --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFFromUnixTime.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +public class TestUDFFromUnixTime { + + @Test + public void testFromUnixTime() throws HiveException { + UDFFromUnixTime udf = new UDFFromUnixTime(); + + //int, no format + verifyInt(0, "1970-01-01 00:00:00", null, udf); + verifyInt(1296705906, "2011-02-03 04:05:06", null, udf); + verifyInt(1514818800, "2018-01-01 15:00:00", null, udf); + + //long, no format + verifyLong(0L, "1970-01-01 00:00:00", null, udf); + verifyLong(1296705906L, "2011-02-03 04:05:06", null, udf); + verifyLong(1514818800L, "2018-01-01 15:00:00", null, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "0995-01-05 15:00:00", null, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "0005-01-01 00:00:00", null, udf); + verifyLong(253402300799L, "9999-12-31 23:59:59", null, udf); + + //int with format + String format = "HH:mm:ss"; + verifyInt(0, "00:00:00", format, udf); + verifyInt(1296705906, "04:05:06", format, udf); + verifyInt(1514818800, "15:00:00", format, udf); + + //long with format + verifyLong(0L, "00:00:00", format, udf); + verifyLong(1296705906L, "04:05:06", format, udf); + verifyLong(1514818800L, "15:00:00", format, udf); + // proleptic Gregorian input: -30767590800L + verifyLong(-30767158800L, "15:00:00", format, udf); + // proleptic Gregorian input: -62009366400 + verifyLong(-62009539200L, "00:00:00", format, udf); + verifyLong(253402300799L, "23:59:59", format, udf); + + } + + private void verifyInt(int value, String expected, String format, UDFFromUnixTime udf) { + IntWritable input = new IntWritable(value); + Text res; + if (format == null) { + res = udf.evaluate(input); + } else { + res = udf.evaluate(input, new Text(format)); + } + Assert.assertEquals(expected, res.toString()); + } + + private void verifyLong(long value, String expected, String format, UDFFromUnixTime udf) { + LongWritable input = new LongWritable(value); + Text res; + if (format == null) { + res = udf.evaluate(input); + } else { + res = udf.evaluate(input, new Text(format)); + } + Assert.assertEquals(expected, res.toString()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index 7c2ee15646..a6bab75d38 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -35,9 +35,9 @@ public class TestGenericUDFAddMonths extends TestCase { - private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss"); - private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS"); - private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd"); + private final Text fmtTextWithTime = new Text("yyyy-MM-dd HH:mm:ss"); + private final Text fmtTextWithTimeAndms = new Text("yyyy-MM-dd HH:mm:ss.SSS"); + private final Text fmtTextWithoutTime = new Text("yyyy-MM-dd"); private final Text fmtTextInvalid = new Text("YYYY-abcdz"); public void testAddMonthsInt() throws HiveException { diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java index 6a3cdda48a..97b83c94c1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java @@ -44,24 +44,32 @@ public void testDateFormatStr() throws HiveException { udf.initialize(arguments); // date str - runAndVerifyStr("2015-04-05", fmtText, "Sunday", udf); - runAndVerifyStr("2015-04-06", fmtText, "Monday", udf); - runAndVerifyStr("2015-04-07", fmtText, "Tuesday", udf); - runAndVerifyStr("2015-04-08", fmtText, "Wednesday", udf); - runAndVerifyStr("2015-04-09", fmtText, "Thursday", udf); - runAndVerifyStr("2015-04-10", fmtText, "Friday", udf); - runAndVerifyStr("2015-04-11", fmtText, "Saturday", udf); - runAndVerifyStr("2015-04-12", fmtText, "Sunday", udf); + runAndVerifyStr("2015-04-05", "Sunday", udf); + runAndVerifyStr("2015-04-06", "Monday", udf); + runAndVerifyStr("2015-04-07", "Tuesday", udf); + runAndVerifyStr("2015-04-08", "Wednesday", udf); + runAndVerifyStr("2015-04-09", "Thursday", udf); + runAndVerifyStr("2015-04-10", "Friday", udf); + runAndVerifyStr("2015-04-11", "Saturday", udf); + runAndVerifyStr("2015-04-12", "Sunday", udf); // ts str - runAndVerifyStr("2015-04-05 10:30:45", fmtText, "Sunday", udf); - runAndVerifyStr("2015-04-06 10:30:45", fmtText, "Monday", udf); - runAndVerifyStr("2015-04-07 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyStr("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyStr("2015-04-09 10:30", fmtText, "Thursday", udf); - runAndVerifyStr("2015-04-10 10:30:45.123", fmtText, "Friday", udf); - runAndVerifyStr("2015-04-11T10:30:45", fmtText, "Saturday", udf); - runAndVerifyStr("2015-04-12 10", fmtText, "Sunday", udf); + runAndVerifyStr("2015-04-05 10:30:45", "Sunday", udf); + runAndVerifyStr("2015-04-06 10:30:45", "Monday", udf); + runAndVerifyStr("2015-04-07 10:30:45", "Tuesday", udf); + runAndVerifyStr("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyStr("2015-04-09 10:30", "Thursday", udf); + runAndVerifyStr("2015-04-10 10:30:45.123", "Friday", udf); + runAndVerifyStr("2015-04-11T10:30:45", "Saturday", udf); + runAndVerifyStr("2015-04-12 10", "Sunday", udf); + + //make sure hour is ok + fmtText = new Text("hh"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyStr("2015-04-10 10:30:45.123", "10", udf); } public void testWrongDateStr() throws HiveException { @@ -73,10 +81,10 @@ public void testWrongDateStr() throws HiveException { ObjectInspector[] arguments = {valueOI0, valueOI1}; udf.initialize(arguments); - runAndVerifyStr("2016-02-30 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyStr("2014-01-32", fmtText, "Saturday", udf); - runAndVerifyStr("01/14/2014", fmtText, null, udf); - runAndVerifyStr(null, fmtText, null, udf); + runAndVerifyStr("2016-02-30 10:30:45", "Tuesday", udf); + runAndVerifyStr("2014-01-32", "Saturday", udf); + runAndVerifyStr("01/14/2014", null, udf); + runAndVerifyStr(null, null, udf); } public void testDateFormatDate() throws HiveException { @@ -89,14 +97,22 @@ public void testDateFormatDate() throws HiveException { udf.initialize(arguments); - runAndVerifyDate("2015-04-05", fmtText, "Sunday", udf); - runAndVerifyDate("2015-04-06", fmtText, "Monday", udf); - runAndVerifyDate("2015-04-07", fmtText, "Tuesday", udf); - runAndVerifyDate("2015-04-08", fmtText, "Wednesday", udf); - runAndVerifyDate("2015-04-09", fmtText, "Thursday", udf); - runAndVerifyDate("2015-04-10", fmtText, "Friday", udf); - runAndVerifyDate("2015-04-11", fmtText, "Saturday", udf); - runAndVerifyDate("2015-04-12", fmtText, "Sunday", udf); + runAndVerifyDate("2015-04-05", "Sunday", udf); + runAndVerifyDate("2015-04-06", "Monday", udf); + runAndVerifyDate("2015-04-07", "Tuesday", udf); + runAndVerifyDate("2015-04-08", "Wednesday", udf); + runAndVerifyDate("2015-04-09", "Thursday", udf); + runAndVerifyDate("2015-04-10", "Friday", udf); + runAndVerifyDate("2015-04-11", "Saturday", udf); + runAndVerifyDate("2015-04-12", "Sunday", udf); + + // make sure year is ok + fmtText = new Text("yyyy"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyDate("2015-04-08", "2015", udf); } public void testDateFormatTs() throws HiveException { @@ -109,15 +125,24 @@ public void testDateFormatTs() throws HiveException { udf.initialize(arguments); - runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyTs("2015-04-05 10:30:45", fmtText, "Sunday", udf); - runAndVerifyTs("2015-04-06 10:30:45", fmtText, "Monday", udf); - runAndVerifyTs("2015-04-07 10:30:45", fmtText, "Tuesday", udf); - runAndVerifyTs("2015-04-08 10:30:45", fmtText, "Wednesday", udf); - runAndVerifyTs("2015-04-09 10:30:45", fmtText, "Thursday", udf); - runAndVerifyTs("2015-04-10 10:30:45.123", fmtText, "Friday", udf); - runAndVerifyTs("2015-04-11 10:30:45.123456789", fmtText, "Saturday", udf); - runAndVerifyTs("2015-04-12 10:30:45", fmtText, "Sunday", udf); + runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyTs("2015-04-05 10:30:45", "Sunday", udf); + runAndVerifyTs("2015-04-06 10:30:45", "Monday", udf); + runAndVerifyTs("2015-04-07 10:30:45", "Tuesday", udf); + runAndVerifyTs("2015-04-08 10:30:45", "Wednesday", udf); + runAndVerifyTs("2015-04-09 10:30:45", "Thursday", udf); + runAndVerifyTs("2015-04-10 10:30:45.123", "Friday", udf); + runAndVerifyTs("2015-04-11 10:30:45.123456789", "Saturday", udf); + runAndVerifyTs("2015-04-12 10:30:45", "Sunday", udf); + + // make sure hour of day is ok + fmtText = new Text("HH"); + valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + arguments[1] = valueOI1; + udf.initialize(arguments); + runAndVerifyTs("2015-04-08 00:30:45", "00", udf); + } public void testNullFmt() throws HiveException { @@ -130,7 +155,7 @@ public void testNullFmt() throws HiveException { udf.initialize(arguments); - runAndVerifyStr("2015-04-05", fmtText, null, udf); + runAndVerifyStr("2015-04-05", null, udf); } public void testWrongFmt() throws HiveException { @@ -143,34 +168,31 @@ public void testWrongFmt() throws HiveException { udf.initialize(arguments); - runAndVerifyStr("2015-04-05", fmtText, null, udf); + runAndVerifyStr("2015-04-05", null, udf); } - private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyStr(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = { valueObj0 }; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } - private void runAndVerifyDate(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyDate(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new DateWritableV2( Date.valueOf(str)) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = { valueObj0 }; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } - private void runAndVerifyTs(String str, Text fmtText, String expResult, GenericUDF udf) + private void runAndVerifyTs(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritableV2( Timestamp.valueOf(str)) : null); - DeferredObject valueObj1 = new DeferredJavaObject(fmtText); - DeferredObject[] args = { valueObj0, valueObj1 }; + DeferredObject[] args = { valueObj0 }; Text output = (Text) udf.evaluate(args); assertEquals("date_format() test ", expResult, output != null ? output.toString() : null); } diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java index 61623d54c9..f9b53f4edc 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestamp.java @@ -74,6 +74,14 @@ public void testTimestamp() throws HiveException { // test null values runAndVerify(udf, null, null); + + ts = Timestamp.valueOf("1111-02-03 01:02:03"); + runAndVerify(udf, + new TimestampWritableV2(ts), + new LongWritable(ts.toEpochSecond())); + + // test null values + runAndVerify(udf, null, null); } public void testDate() throws HiveException {