diff --git common/src/java/org/apache/hadoop/hive/common/type/Date.java common/src/java/org/apache/hadoop/hive/common/type/Date.java index 6ecfcf65c9..eac232cba1 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Date.java +++ common/src/java/org/apache/hadoop/hive/common/type/Date.java @@ -17,10 +17,7 @@ */ package org.apache.hadoop.hive.common.type; -import java.time.Instant; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneOffset; +import java.time.*; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.format.DateTimeParseException; @@ -53,12 +50,32 @@ PRINT_FORMATTER = builder.toFormatter(); } + private static final LocalDate MIN_LOCALDATE = LocalDate.parse("0000-01-01", PARSE_FORMATTER); + private static final LocalDate MAX_LOCALDATE = LocalDate.parse("9999-12-31", PARSE_FORMATTER); + private LocalDate localDate; + /* Private constructor. ASSUME DATE IS VALID (use method valid() to check)! */ private Date(LocalDate localDate) { this.localDate = localDate != null ? localDate : EPOCH; } + /** + * Valid range is years 0000-9999, and 0000 converts to 0001. + */ + private static boolean valid(LocalDate localDate) { + return localDate.compareTo(MIN_LOCALDATE) >= 0 && + localDate.compareTo(MAX_LOCALDATE) <= 0; + } + + private void setLocalDate(LocalDate localDate) throws DateTimeException { + if (valid(localDate)) { + this.localDate = localDate; + } else { + throw new DateTimeException("Date value " + localDate + " out of range"); + } + } + public Date() { this(EPOCH); } @@ -69,7 +86,7 @@ public Date(Date d) { @Override public String toString() { - return localDate.format(PRINT_FORMATTER); + return localDate != null ? localDate.format(PRINT_FORMATTER) : null; } public int hashCode() { @@ -105,21 +122,21 @@ public void setYear(int year) { localDate = localDate.withYear(year); } - public void setMonth(int month) { - localDate = localDate.withMonth(month); + public void setMonth(int month) throws DateTimeException { + setLocalDate(localDate.withMonth(month)); } - public void setDayOfMonth(int dayOfMonth) { - localDate = localDate.withDayOfMonth(dayOfMonth); + public void setDayOfMonth(int dayOfMonth) throws DateTimeException { + setLocalDate(localDate.withDayOfMonth(dayOfMonth)); } - public void setTimeInDays(int epochDay) { - localDate = LocalDate.ofEpochDay(epochDay); + public void setTimeInDays(int epochDay) throws DateTimeException { + setLocalDate(LocalDate.ofEpochDay(epochDay)); } - public void setTimeInMillis(long epochMilli) { - localDate = LocalDateTime.ofInstant( - Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC).toLocalDate(); + public void setTimeInMillis(long epochMilli) throws DateTimeException { + setLocalDate( + LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC).toLocalDate()); } public static Date valueOf(String s) { @@ -134,20 +151,39 @@ public static Date valueOf(String s) { } catch (DateTimeParseException e) { throw new IllegalArgumentException("Cannot create date, parsing error"); } - return new Date(localDate); + if (valid(localDate)) { + return new Date(localDate); + } else { + return null; + } } public static Date ofEpochDay(int epochDay) { - return new Date(LocalDate.ofEpochDay(epochDay)); + LocalDate localDate = LocalDate.ofEpochDay(epochDay); + if (valid(localDate)) { + return new Date(localDate); + } else { + return null; + } } public static Date ofEpochMilli(long epochMilli) { - return new Date(LocalDateTime.ofInstant( - Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC).toLocalDate()); + LocalDate localDate = LocalDateTime.ofInstant( + Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC).toLocalDate(); + if (valid(localDate)) { + return new Date(localDate); + } else { + return null; + } } public static Date of(int year, int month, int dayOfMonth) { - return new Date(LocalDate.of(year, month, dayOfMonth)); + LocalDate localDate = LocalDate.of(year, month, dayOfMonth); + if (valid(localDate)) { + return new Date(localDate); + } else { + return null; + } } public int getYear() { diff --git common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java index a8b7b6d186..ffa33505b0 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java +++ common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.common.type; +import java.time.DateTimeException; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -75,11 +76,32 @@ private LocalDateTime localDateTime; - /* Private constructor */ - private Timestamp(LocalDateTime localDateTime) { + private static final LocalDateTime MIN_LOCALDATETIME = + LocalDateTime.parse("0000-01-01 00:00:00", PARSE_FORMATTER); + private static final LocalDateTime MAX_LOCALDATETIME = + LocalDateTime.parse("9999-12-31 23:59:59.999999999", PARSE_FORMATTER); + + /* Private constructor. ASSUME TIMESTAMP IS VALID (use method valid() to check)! */ + private Timestamp(LocalDateTime localDateTime) throws DateTimeException { this.localDateTime = localDateTime != null ? localDateTime : EPOCH; } + private void setLocalDateTime(LocalDateTime localDateTime) { + if (valid(localDateTime)) { + this.localDateTime = localDateTime; + } else { + throw new DateTimeException("Timestamp value " + localDateTime + " out of range"); + } + } + + /** + * Valid range is years 0000-9999, and 0000 converts to 0001. + */ + private static boolean valid(LocalDateTime localDateTime) { + return localDateTime.compareTo(MIN_LOCALDATETIME) >= 0 && + localDateTime.compareTo(MAX_LOCALDATETIME) <= 0; + } + public Timestamp() { this(EPOCH); } @@ -89,7 +111,7 @@ public Timestamp(Timestamp t) { } public void set(Timestamp t) { - this.localDateTime = t != null ? t.localDateTime : EPOCH; + setLocalDateTime(t != null ? t.localDateTime : EPOCH); } public String format(DateTimeFormatter formatter) { @@ -98,7 +120,7 @@ public String format(DateTimeFormatter formatter) { @Override public String toString() { - return localDateTime.format(PRINT_FORMATTER); + return localDateTime != null ? localDateTime.format(PRINT_FORMATTER) : null; } public int hashCode() { @@ -122,28 +144,26 @@ public long toEpochSecond() { return localDateTime.toEpochSecond(ZoneOffset.UTC); } - public void setTimeInSeconds(long epochSecond) { + public void setTimeInSeconds(long epochSecond) throws DateTimeException { setTimeInSeconds(epochSecond, 0); } - public void setTimeInSeconds(long epochSecond, int nanos) { - localDateTime = LocalDateTime.ofEpochSecond( - epochSecond, nanos, ZoneOffset.UTC); + public void setTimeInSeconds(long epochSecond, int nanos) throws DateTimeException { + setLocalDateTime(LocalDateTime.ofEpochSecond(epochSecond, nanos, ZoneOffset.UTC)); } public long toEpochMilli() { return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli(); } - public void setTimeInMillis(long epochMilli) { - localDateTime = LocalDateTime.ofInstant( - Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC); + public void setTimeInMillis(long epochMilli) throws DateTimeException { + setLocalDateTime(LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC)); } - public void setTimeInMillis(long epochMilli, int nanos) { - localDateTime = LocalDateTime + public void setTimeInMillis(long epochMilli, int nanos) throws DateTimeException { + setLocalDateTime(LocalDateTime .ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC) - .withNano(nanos); + .withNano(nanos)); } public int getNanos() { @@ -163,7 +183,10 @@ public static Timestamp valueOf(String s) { throw new IllegalArgumentException("Cannot create timestamp, parsing error"); } } - return new Timestamp(localDateTime); + if (valid(localDateTime)) { + return new Timestamp(localDateTime); + } + return null; } public static Timestamp ofEpochSecond(long epochSecond) { @@ -171,23 +194,33 @@ public static Timestamp ofEpochSecond(long epochSecond) { } public static Timestamp ofEpochSecond(long epochSecond, int nanos) { - return new Timestamp( - LocalDateTime.ofEpochSecond(epochSecond, nanos, ZoneOffset.UTC)); + LocalDateTime localDateTime = LocalDateTime.ofEpochSecond(epochSecond, nanos, ZoneOffset.UTC); + if (valid(localDateTime)) { + return new Timestamp(localDateTime); + } + return null; } public static Timestamp ofEpochMilli(long epochMilli) { - return new Timestamp(LocalDateTime - .ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC)); + LocalDateTime localDateTime = + LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC); + if (valid(localDateTime)) { + return new Timestamp(localDateTime); + } + return null; } public static Timestamp ofEpochMilli(long epochMilli, int nanos) { - return new Timestamp(LocalDateTime - .ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC) - .withNano(nanos)); + LocalDateTime localDateTime = + LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneOffset.UTC).withNano(nanos); + if (valid(localDateTime)) { + return new Timestamp(localDateTime); + } + return null; } public void setNanos(int nanos) { - localDateTime = localDateTime.withNano(nanos); + setLocalDateTime(localDateTime.withNano(nanos)); } public int getYear() { diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java index f26f8ae01e..d2ae1b1603 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java @@ -157,6 +157,23 @@ public static Timestamp decimalToTimestamp(HiveDecimalV1 dec) { } } + /** + * Converts the time in seconds or milliseconds to a timestamp. + * @param time time in seconds or in milliseconds + * @return the timestamp + */ + public static Timestamp longToTimestamp(long time, boolean intToTimestampInSeconds) { + try { + // If the time is in seconds, converts it to milliseconds first. + if (intToTimestampInSeconds) { + return Timestamp.ofEpochSecond(time); + } + return Timestamp.ofEpochMilli(time); + } catch (DateTimeException e) { + return null; + } + } + /** * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of * seconds. 500 would round to 0, -500 would round to -1. @@ -186,9 +203,26 @@ public static Timestamp stringToTimestamp(String s) { return Timestamp.valueOf( TimestampTZUtil.parse(s).getZonedDateTime().toLocalDateTime().toString()); } catch (IllegalArgumentException | DateTimeParseException eTZ) { - // Last attempt - return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli()); + try { + // Last attempt + return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli()); + } catch (IllegalArgumentException | DateTimeParseException | NullPointerException eTZ2) { + return null; + } } } } + + public static Timestamp timestampLocalTzToTimestamp(String tstz) { + int index = tstz.indexOf(" "); + index = tstz.indexOf(" ", index + 1); + if (index == -1) { + return null; + } + try { + return Timestamp.valueOf(tstz.substring(0, index)); + } catch (DateTimeException e) { + return null; + } + } } diff --git common/src/java/org/apache/hive/common/util/DateParser.java common/src/java/org/apache/hive/common/util/DateParser.java index 5db14f1906..defa861098 100644 --- common/src/java/org/apache/hive/common/util/DateParser.java +++ common/src/java/org/apache/hive/common/util/DateParser.java @@ -45,6 +45,9 @@ public boolean parseDate(String strValue, Date result) { if (parsedVal == null) { return false; } + if (result == null) { + result = new Date(); + } result.setTimeInMillis(parsedVal.toEpochMilli()); return true; } diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java index 00a7904ecf..504ddc8842 100644 --- common/src/test/org/apache/hive/common/util/TestTimestampParser.java +++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java @@ -19,6 +19,7 @@ package org.apache.hive.common.util; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; import org.apache.hadoop.hive.common.type.Timestamp; @@ -53,6 +54,13 @@ static void testInvalidCases(TimestampParser tp, String[] invalidCases) { } } + static void testOutOfRangeCases(TimestampParser tp, String[] outOfRangeCases) { + for (String outOfRangeString : outOfRangeCases) { + Timestamp ts = tp.parseTimestamp(outOfRangeString); + assertNull(ts); + } + } + @Test public void testDefault() { // No timestamp patterns, should default to normal timestamp format @@ -72,8 +80,14 @@ public void testDefault() { "12345", }; + String[] outOfRangeCases = { + "-0001-02-03 04:05:06.7890", + "10000-01-01 00:00:00.000", + }; + testValidCases(tp, validCases); testInvalidCases(tp, invalidCases); + testOutOfRangeCases(tp, outOfRangeCases); } @Test @@ -120,8 +134,14 @@ public void testPattern1() { "12345", }; + String[] outOfRangeCases = { + "-0001-12-31 23:59:59.999999999", + "10000-01-01 00:00:00.000", + }; + testValidCases(tp, validCases); testInvalidCases(tp, invalidCases); + testOutOfRangeCases(tp, outOfRangeCases); } @Test @@ -151,8 +171,14 @@ public void testMillisParser() { "1420509274123-", }; + String[] outOfRangeCases = { + "-0001-12-31 23:59:59.999999999", + "10000-01-01 00:00:00.000", + }; + testValidCases(tp, validCases); testInvalidCases(tp, invalidCases); + testOutOfRangeCases(tp, outOfRangeCases); } @Test @@ -178,7 +204,13 @@ public void testPattern2() { "12345", }; + String[] outOfRangeCases = { + "-0001-12-31 23:59:59.999999999", + "10000-01-01 00:00:00.000", + }; + testValidCases(tp, validCases); testInvalidCases(tp, invalidCases); + testOutOfRangeCases(tp, outOfRangeCases); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java index cf00d7b820..6b88b2cde4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.time.DateTimeException; import java.util.Collections; import java.util.Map; import java.util.Map.Entry; @@ -347,6 +348,8 @@ private Date readDateValue(String dateStr) { // Fallback to integer parsing LOG.debug("Reading date value as days since epoch: {}", dateStr); return new Date(Long.parseLong(dateStr)); + } catch (DateTimeException e) { + return null; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index 19a3eedcb5..8bdeb3ad40 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.time.DateTimeException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -416,18 +417,23 @@ private void assignRowColumn( if (object instanceof Timestamp) { ((TimestampColumnVector) columnVector).set( batchIndex, ((Timestamp) object).toSqlTimestamp()); - } else { + } else if (object instanceof TimestampWritableV2) { ((TimestampColumnVector) columnVector).set( batchIndex, ((TimestampWritableV2) object).getTimestamp().toSqlTimestamp()); + } else { + //null + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); } break; case DATE: if (object instanceof Date) { ((LongColumnVector) columnVector).vector[batchIndex] = DateWritableV2.dateToDays((Date) object); - } else { + } else if (((DateWritableV2) object).get() != null) { ((LongColumnVector) columnVector).vector[batchIndex] = ((DateWritableV2) object).getDays(); + } else { + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); } break; case FLOAT: @@ -976,6 +982,8 @@ private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, // Some of the conversion methods throw this exception on numeric parsing errors. VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; + } catch (DateTimeException e) { + } // We always set the null flag to false when there is a value. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index e1482e077d..f3d618d862 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -248,11 +248,16 @@ public Object extractRowColumn( java.sql.Timestamp ts = ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex); Timestamp serializableTS = Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()); + if (serializableTS == null) { + return null; + } ((TimestampWritableV2) primitiveWritable).set(serializableTS); return primitiveWritable; case DATE: - ((DateWritableV2) primitiveWritable).set( - (int) ((LongColumnVector) colVector).vector[adjustedIndex]); + if (!((DateWritableV2) primitiveWritable).set( + (int) ((LongColumnVector) colVector).vector[adjustedIndex])) { + return null; // null Date -> null DateWritable + } return primitiveWritable; case FLOAT: ((FloatWritable) primitiveWritable).set( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index 66585af577..1270d7be8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -434,9 +434,13 @@ private void serializePrimitiveWrite( break; case TIMESTAMP: // From java.sql.Timestamp used by vectorization to serializable org.apache.hadoop.hive.common.type.Timestamp - java.sql.Timestamp ts = ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex); - Timestamp serializableTS = Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()); - serializeWrite.writeTimestamp(serializableTS); + try { + java.sql.Timestamp ts = ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex); + Timestamp serializableTS = Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()); + serializeWrite.writeTimestamp(serializableTS); + } catch (NullPointerException e) { + serializeWrite.writeNull(); + } break; case FLOAT: serializeWrite.writeFloat((float) ((DoubleColumnVector) colVector).vector[adjustedBatchIndex]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5ff338660f..e1c96d2546 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -21,6 +21,7 @@ import java.lang.reflect.Constructor; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.time.DateTimeException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -4033,9 +4034,14 @@ private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws H Object scalarValue = getScalarValue(constDesc); switch (primitiveCategory) { case DATE: - return (long) DateWritableV2.dateToDays((Date) scalarValue); + try { + return (long) DateWritableV2.dateToDays((Date) scalarValue); + } catch (DateTimeException e) { + return null; + } case TIMESTAMP: - return ((org.apache.hadoop.hive.common.type.Timestamp) scalarValue).toSqlTimestamp(); + org.apache.hadoop.hive.common.type.Timestamp timestamp = (org.apache.hadoop.hive.common.type.Timestamp) scalarValue; + return timestamp == null ? null : timestamp.toSqlTimestamp(); case INTERVAL_YEAR_MONTH: return ((HiveIntervalYearMonth) scalarValue).getTotalMonths(); default: diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 617cbf15a8..cd9f103ba0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -366,7 +366,7 @@ private static void setVector(Object row, break; case DATE: { LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; - if (writableCol != null) { + if (writableCol != null && ((DateWritableV2) writableCol).get() != null) { lcv.vector[rowIndex] = ((DateWritableV2) writableCol).getDays(); lcv.isNull[rowIndex] = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index a6dff12e1a..b2ced147d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -28,6 +28,7 @@ import org.apache.hive.common.util.DateParser; import java.nio.charset.StandardCharsets; +import java.time.DateTimeException; import java.util.Arrays; /** @@ -154,14 +155,16 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); Date hDate = new Date(); - if (dateParser.parseDate(dateString, hDate)) { - outputColVector.vector[i] = DateWritableV2.dateToDays(hDate); - return; + try { + if (dateParser.parseDate(dateString, hDate)) { + outputColVector.vector[i] = DateWritableV2.dateToDays(hDate); + return; + } + } catch (DateTimeException e) { + outputColVector.vector[i] = 1; + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } - - outputColVector.vector[i] = 1; - outputColVector.isNull[i] = true; - outputColVector.noNulls = false; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java index eff20c948c..a6dd9b5506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.time.DateTimeException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -181,9 +182,19 @@ private void initValue() { decimalValue = PrimitiveObjectInspectorUtils.getHiveDecimal(val, poi); break; case DATE: - longValue = DateWritableV2.dateToDays(PrimitiveObjectInspectorUtils.getDate(val, poi)); + try { + longValue = DateWritableV2.dateToDays(PrimitiveObjectInspectorUtils.getDate(val, poi)); + } catch (DateTimeException e) { + } + break; case TIMESTAMP: - timestampValue = PrimitiveObjectInspectorUtils.getTimestamp(val, poi).toSqlTimestamp(); + org.apache.hadoop.hive.common.type.Timestamp tempTimestamp = + PrimitiveObjectInspectorUtils.getTimestamp(val, poi); + if (tempTimestamp != null) { + timestampValue = tempTimestamp.toSqlTimestamp(); + } else { + timestampValue = null; + } break; case INTERVAL_YEAR_MONTH: longValue = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(val, poi).getTotalMonths(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 669ca5556a..aae642d881 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -30,6 +30,7 @@ import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.time.DateTimeException; import java.util.Arrays; @@ -46,7 +47,7 @@ protected boolean isPositive = true; private transient final DateParser dateParser = new DateParser(); - private transient final Date baseDate = new Date(); + private transient Date baseDate = new Date(); // Transient members initialized by transientInit method. private transient PrimitiveCategory primitiveCategory; @@ -99,7 +100,11 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { switch (primitiveCategory) { case DATE: - baseDate.setTimeInMillis(DateWritableV2.daysToMillis((int) longValue)); + try { + baseDate.setTimeInMillis(DateWritableV2.daysToMillis((int) longValue)); + } catch (DateTimeException e) { + baseDate = null; + } break; case TIMESTAMP: @@ -246,8 +251,12 @@ public String vectorExpressionParameters() { String value; if (object instanceof Long) { Date tempDate = new Date(); - tempDate.setTimeInMillis(DateWritableV2.daysToMillis((int) longValue)); - value = tempDate.toString(); + try { + tempDate.setTimeInMillis(DateWritableV2.daysToMillis((int) longValue)); + value = tempDate.toString(); + } catch (DateTimeException e) { + value = "NULL"; + } } else if (object instanceof Timestamp) { value = org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli( timestampValue.getTime(), timestampValue.getNanos()).toString(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java index 4ecd09f1fc..f789521511 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.time.DateTimeException; import java.util.Calendar; /** @@ -37,7 +38,7 @@ public VectorUDFMonthDate() { } @Override - protected long getDateField(long days) { + protected long getDateField(long days) throws DateTimeException { /* january is 0 */ return 1 + super.getDateField(days); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java index 1f83eadcbb..d521d150e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java @@ -20,6 +20,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import java.time.DateTimeException; + /** * Return Unix Timestamp. * Extends {@link VectorUDFTimestampFieldDate} @@ -31,7 +33,7 @@ private DateWritableV2 dateWritable; @Override - protected long getDateField(long days) { + protected long getDateField(long days) throws DateTimeException { dateWritable.set((int) days); return dateWritable.getTimeInSeconds(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 3d61c33afd..caf6014a37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -49,6 +49,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.time.DateTimeException; import java.util.List; import java.util.Map; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index 31c96826b0..92a7317170 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.stats; import java.nio.ByteBuffer; +import java.time.DateTimeException; import java.util.List; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -198,10 +199,18 @@ private static void unpackDateStats(ObjectInspector oi, Object o, String fName, statsObj.getStatsData().getDateStats().setNumDVs(v); } else if (fName.equals("max")) { DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); + try { + statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); + } catch (DateTimeException e) { + //do nothing + } } else if (fName.equals("min")) { DateWritableV2 v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); + try { + statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); + } catch (DateTimeException e) { + //do nothing + } } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java index 18ca9a7e9b..ca66a0c455 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java @@ -84,7 +84,7 @@ public IntWritable evaluate(Text dateString) { } public IntWritable evaluate(DateWritableV2 d) { - if (d == null) { + if (d == null || d.get() == null) { return null; } Date date = d.get(); @@ -99,8 +99,12 @@ public IntWritable evaluate(TimestampWritableV2 t) { } Timestamp ts = t.getTimestamp(); - calendar.setTimeInMillis(ts.toEpochMilli()); - result.set(calendar.get(Calendar.WEEK_OF_YEAR)); + if (ts != null) { + calendar.setTimeInMillis(ts.toEpochMilli()); + result.set(calendar.get(Calendar.WEEK_OF_YEAR)); + } else { + return null; + } return result; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 0d8d659ff6..7c4a187c3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -512,7 +512,12 @@ protected Date getDateValue(DeferredObject[] arguments, int i, PrimitiveCategory case DATE: case TIMESTAMPLOCALTZ: Object writableValue = converters[i].convert(obj); - date = ((DateWritableV2) writableValue).get(); + DateWritableV2 dateWritableV2 = (DateWritableV2) writableValue; + if (dateWritableV2 != null) { + date = dateWritableV2.get(); + } else { + date = null; + } break; default: throw new UDFArgumentTypeException(0, getFuncName() diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java index be7bd1704f..6068ca3a6a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java @@ -173,13 +173,18 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } break; case TIMESTAMP: - Timestamp ts = ((TimestampWritableV2) dateConverter.convert(arguments[0].get())) - .getTimestamp(); - output.set(DateWritableV2.millisToDays(ts.toEpochMilli())); + try { + Timestamp ts = ((TimestampWritableV2) dateConverter.convert(arguments[0].get())).getTimestamp(); + output.set(DateWritableV2.millisToDays(ts.toEpochMilli())); + } catch (NullPointerException e) { + return null; + } break; case DATE: DateWritableV2 dw = (DateWritableV2) dateConverter.convert(arguments[0].get()); - output.set(dw.getDays()); + if (dw == null || !output.set(dw.getDays())) { + return null; + } break; default: throw new UDFArgumentException( diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNextDay.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNextDay.java index c700797ac2..071748817a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNextDay.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNextDay.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; +import java.time.DateTimeException; import java.util.Calendar; /** @@ -130,9 +131,13 @@ protected Date nextDay(Date d, int dayOfWeek) { daysToAdd = 7 - currDayOfWeek + dayOfWeek; } - date.setTimeInDays(date.toEpochDay() + daysToAdd); + try { + date.setTimeInDays(date.toEpochDay() + daysToAdd); - return date; + return date; + } catch (DateTimeException e) { + return null; + } } protected int getIntDayOfWeek(String dayOfWeek) throws UDFArgumentException { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 3c3796e8a6..09ea40bf0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -20,6 +20,7 @@ import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.DateTimeException; import java.util.TimeZone; import org.apache.commons.lang.StringUtils; @@ -156,9 +157,12 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return null; } } else if (inputDateOI != null) { - retValue.set(inputDateOI.getPrimitiveWritableObject(arguments[0].get()) - .getTimeInSeconds()); - return retValue; + try { + retValue.set(inputDateOI.getPrimitiveWritableObject(arguments[0].get()).getTimeInSeconds()); + return retValue; + } catch (DateTimeException | NullPointerException e) { + return null; + } } else if (inputTimestampLocalTzOI != null) { TimestampTZ timestampTZ = inputTimestampLocalTzOI.getPrimitiveJavaObject(arguments[0].get()); @@ -171,7 +175,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } protected static void setValueFromTs(LongWritable value, Timestamp timestamp) { - value.set(timestamp.toEpochSecond()); + value.set(timestamp == null ? null : timestamp.toEpochSecond()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java index 7a7d13ef41..f19f90b7b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java @@ -431,10 +431,12 @@ private Date evalDate(Date d) throws UDFArgumentException { int month = date.getMonth() - 1; int quarter = month / 3; int monthToSet = quarter * 3 + 1; + // Assume dates are valid date.setMonth(monthToSet); date.setDayOfMonth(1); return date; } else if ("YEAR".equals(fmtInput) || "YYYY".equals(fmtInput) || "YY".equals(fmtInput)) { + // Assume dates are valid date.setMonth(1); date.setDayOfMonth(1); return date; diff --git ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java index 20acfa253c..4db30d1160 100644 --- ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java +++ ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hive.common.util.DateUtils; +import java.time.DateTimeException; import java.util.Calendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; @@ -85,8 +86,9 @@ public Timestamp add(Timestamp ts, HiveIntervalYearMonth interval) { } Timestamp tsResult = new Timestamp(); - add(ts, interval, tsResult); - + if (!add(ts, interval, tsResult)) { + return null; + } return tsResult; } @@ -108,7 +110,11 @@ public boolean add(Timestamp ts, HiveIntervalYearMonth interval, Timestamp resul } long resultMillis = addMonthsToMillis(ts.toEpochMilli(), interval.getTotalMonths()); - result.setTimeInMillis(resultMillis, ts.getNanos()); + try { + result.setTimeInMillis(resultMillis, ts.getNanos()); + } catch (DateTimeException e) { + return false; + } return true; } @@ -134,7 +140,9 @@ public Timestamp add(HiveIntervalYearMonth interval, Timestamp ts) { } Timestamp tsResult = new Timestamp(); - add(interval, ts, tsResult); + if (!add(interval, ts, tsResult)) { + return null; + } return tsResult; } @@ -157,7 +165,11 @@ public boolean add(HiveIntervalYearMonth interval, Timestamp ts, Timestamp resul } long resultMillis = addMonthsToMillis(ts.toEpochMilli(), interval.getTotalMonths()); - result.setTimeInMillis(resultMillis, ts.getNanos()); + try { + result.setTimeInMillis(resultMillis, ts.getNanos()); + } catch (DateTimeException e) { + return false; + } return true; } @@ -181,7 +193,9 @@ public Date add(Date dt, HiveIntervalYearMonth interval) { } Date dtResult = new Date(); - add(dt, interval, dtResult); + if (!add(dt, interval, dtResult)) { + return null; + } return dtResult; } @@ -199,12 +213,16 @@ public Date add(Date dt, HiveIntervalYearMonth interval) { } public boolean add(Date dt, HiveIntervalYearMonth interval, Date result) { - if (dt == null || interval == null) { + if (dt == null || interval == null || result == null) { return false; } long resultMillis = addMonthsToMillis(dt.toEpochMilli(), interval.getTotalMonths()); - result.setTimeInMillis(resultMillis); + try { + result.setTimeInMillis(resultMillis); + } catch (DateTimeException e) { + return false; + } return true; } @@ -225,8 +243,9 @@ public Date add(HiveIntervalYearMonth interval, Date dt) { } Date dtResult = new Date(); - add(interval, dt, dtResult); - + if (!add(interval, dt, dtResult)) { + return null; + } return dtResult; } @@ -242,7 +261,12 @@ public Date add(Date dt, int interval) { } Date dtResult = new Date(); - dtResult.setTimeInDays(dt.toEpochDay() + interval); + try { + dtResult.setTimeInDays(dt.toEpochDay() + interval); + } catch (DateTimeException e) { + dt = null; + return null; + } return dtResult; } @@ -264,8 +288,12 @@ public boolean add(HiveIntervalYearMonth interval, Date dt, Date result) { } long resultMillis = addMonthsToMillis(dt.toEpochMilli(), interval.getTotalMonths()); - result.setTimeInMillis(resultMillis); - return true; + try { + result.setTimeInMillis(resultMillis); + return true; + } catch (DateTimeException e) { + return false; + } } @Deprecated @@ -295,7 +323,9 @@ public Timestamp subtract(Timestamp left, HiveIntervalYearMonth right) { } Timestamp tsResult = new Timestamp(); - subtract(left, right, tsResult); + if (!subtract(left, right, tsResult)) { + return null; + } return tsResult; } @@ -333,7 +363,9 @@ public Date subtract(Date left, HiveIntervalYearMonth right) { } Date dtResult = new Date(); - subtract(left, right, dtResult); + if (!subtract(left, right, dtResult)) { + return null; + } return dtResult; } @@ -382,7 +414,9 @@ public Timestamp add(Timestamp ts, HiveIntervalDayTime interval) { } Timestamp tsResult = new Timestamp(); - add(ts, interval, tsResult); + if (!add(ts, interval, tsResult)) { + return null; + } return tsResult; } @@ -409,7 +443,11 @@ public boolean add(Timestamp ts, HiveIntervalDayTime interval, long newMillis = ts.toEpochMilli() + TimeUnit.SECONDS.toMillis(interval.getTotalSeconds() + nanosResult.seconds); - result.setTimeInMillis(newMillis, nanosResult.nanos); + try { + result.setTimeInMillis(newMillis, nanosResult.nanos); + } catch (DateTimeException e) { + return false; + } return true; } @@ -435,7 +473,9 @@ public Timestamp add(HiveIntervalDayTime interval, Timestamp ts) { } Timestamp tsResult = new Timestamp(); - add(interval, ts, tsResult); + if (!add(interval, ts, tsResult)) { + return null; + } return tsResult; } @@ -457,10 +497,14 @@ public boolean add(HiveIntervalDayTime interval, Timestamp ts, } nanosResult.addNanos(ts.getNanos(), interval.getNanos()); - long newMillis = ts.toEpochMilli() + TimeUnit.SECONDS.toMillis(interval.getTotalSeconds() + nanosResult.seconds); - result.setTimeInMillis(newMillis, nanosResult.nanos); + + try { + result.setTimeInMillis(newMillis, nanosResult.nanos); + } catch (DateTimeException e) { + return false; + } return true; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java index a0ba0e1346..ab9f1d38cb 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java @@ -391,7 +391,11 @@ public static void serializeWrite(SerializeWrite serializeWrite, case DATE: { Date value = ((DateWritableV2) object).get(); - serializeWrite.writeDate(value); + if (value != null) { + serializeWrite.writeDate(value); + } else { + serializeWrite.writeNull(); + } } break; case TIMESTAMP: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java index 8a68506208..b9cf4e0f66 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java @@ -46,7 +46,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java index b5ad22c6a2..75f325a59c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -116,7 +116,11 @@ private void compareToUDFYearDate(long t, int y) throws HiveException { TimestampWritableV2 tsw = toTimestampWritable(t); IntWritable res = (IntWritable) udf.evaluate( new GenericUDF.DeferredObject[]{new GenericUDF.DeferredJavaObject(tsw)}); - Assert.assertEquals(res.get(), y); + if (tsw.getTimestamp() == null) { + Assert.assertNull(res); + } else { + Assert.assertEquals(res.get(), y); + } } private void verifyUDFYear(VectorizedRowBatch batch) throws HiveException { @@ -180,7 +184,11 @@ private void compareToUDFDayOfMonthDate(long t, int y) throws HiveException { TimestampWritableV2 tsw = toTimestampWritable(t); IntWritable res = (IntWritable) udf.evaluate( new GenericUDF.DeferredObject[]{new GenericUDF.DeferredJavaObject(tsw)}); - Assert.assertEquals(res.get(), y); + if (tsw.getTimestamp() == null) { + Assert.assertNull(res); + } else { + Assert.assertEquals(res.get(), y); + } } private void verifyUDFDayOfMonth(VectorizedRowBatch batch) throws HiveException { @@ -244,7 +252,11 @@ private void compareToUDFMonthDate(long t, int y) throws HiveException { TimestampWritableV2 tsw = toTimestampWritable(t); IntWritable res = (IntWritable) udf.evaluate( new GenericUDF.DeferredObject[]{new GenericUDF.DeferredJavaObject(tsw)}); - Assert.assertEquals(res.get(), y); + if (tsw.getTimestamp() == null) { + Assert.assertNull(res); + } else { + Assert.assertEquals(res.get(), y); + } } private void verifyUDFMonth(VectorizedRowBatch batch) throws HiveException { @@ -304,7 +316,7 @@ public void testVectorUDFMonth() throws HiveException { private LongWritable getLongWritable(TimestampWritableV2 i) { LongWritable result = new LongWritable(); - if (i == null) { + if (i == null || i.getTimestamp() == null && i.getBytes() == null) { return null; } else { result.set(i.getSeconds()); @@ -382,7 +394,11 @@ private void compareToUDFWeekOfYearDate(long t, int y) { UDFWeekOfYear udf = new UDFWeekOfYear(); TimestampWritableV2 tsw = toTimestampWritable(t); IntWritable res = udf.evaluate(tsw); - Assert.assertEquals(res.get(), y); + if (tsw.getTimestamp() != null) { + Assert.assertEquals(res.get(), y); + } else { + Assert.assertNull(res); + } } private void verifyUDFWeekOfYear(VectorizedRowBatch batch) throws HiveException { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java index 9615bf30ae..4965d1f3a5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java @@ -395,13 +395,21 @@ public static void serializeWrite(SerializeWrite serializeWrite, case DATE: { Date value = ((DateWritableV2) object).get(); - serializeWrite.writeDate(value); + if (value != null) { + serializeWrite.writeDate(value); + } else { + serializeWrite.writeNull(); + } } break; case TIMESTAMP: { Timestamp value = ((TimestampWritableV2) object).getTimestamp(); - serializeWrite.writeTimestamp(value); + if (value != null) { + serializeWrite.writeTimestamp(value); + } else { + serializeWrite.writeNull(); + } } break; case INTERVAL_YEAR_MONTH: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java index f200aa26e6..f20c0c464d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.util.batchgen; import java.sql.Timestamp; +import java.time.DateTimeException; import java.util.Arrays; import java.util.Random; @@ -628,8 +629,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde for (int i = 0; i < size; i++) { if (!isNull[i]) { Date date = dateArray[i]; - longColVec.vector[i] = - DateWritableV2.dateToDays(date); + try { + longColVec.vector[i] = DateWritableV2.dateToDays(date); + } catch (DateTimeException e) { + //frogemthod longColVec.vector[i] or do nothing?. idk what this does + } } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java index 2e011b58cb..6e21b19213 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java @@ -59,6 +59,7 @@ import org.junit.Before; import org.junit.Test; +import java.time.DateTimeException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -510,7 +511,13 @@ public void testPrimitiveRandomTimestamp() throws SerDeException { for (int i = 0; i < size; i++) { long millis = ((long) rand.nextInt(Integer.MAX_VALUE)) * 1000; Timestamp timestamp = Timestamp.ofEpochMilli(rand.nextBoolean() ? millis : -millis); - timestamp.setNanos(rand.nextInt(1000) * 1000); + if (timestamp != null) { + try { + timestamp.setNanos(rand.nextInt(1000) * 1000); + } catch (DateTimeException e) { + timestamp = null; + } + } rows[i] = new Object[] {new TimestampWritableV2(timestamp)}; } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index 477825e3f4..0a79905a53 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -76,10 +76,9 @@ public void testJulianDay() { Assert.assertEquals(ts2Fetched, ts2); Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30); - //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005. + //check if 730517 Julian Days between Jan 1, 0005 and Jan 31, 2005. cal1 = Calendar.getInstance(); - cal1.set(Calendar.ERA, GregorianCalendar.BC); - cal1.set(Calendar.YEAR, 2005); + cal1.set(Calendar.YEAR, 0005); cal1.set(Calendar.MONTH, Calendar.JANUARY); cal1.set(Calendar.DAY_OF_MONTH, 1); cal1.set(Calendar.HOUR_OF_DAY, 0); @@ -103,7 +102,7 @@ public void testJulianDay() { ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); Assert.assertEquals(ts2Fetched, ts2); - Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305); + Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 730517); } public void testNanos() { diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out index 4f0080960e..a21a5cf63a 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: CREATE TABLE schema_evolution_data_n40(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string) +jaPREHOOK: query: CREATE TABLE schema_evolution_data_n40(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index ac1952ae8c..7d3ad0710d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -969,7 +969,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL 2 12 2 NULL 49 4 40 39 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 1e650f96d4..58e4eab6ab 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -961,7 +961,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL 2 12 2 NULL 49 4 40 39 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index bc26b17249..b200b69a35 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -901,7 +901,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL 2 12 2 NULL 49 4 40 39 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java index 99a0b9a487..1aa22aaecb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java @@ -207,7 +207,10 @@ private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fi return vc.getValue(); case DATE: Date date = ((DateObjectInspector)fieldOI).getPrimitiveJavaObject(structFieldData); - return DateWritableV2.dateToDays(date); + if (date != null) { + return DateWritableV2.dateToDays(date); + } + return null; case TIMESTAMP: Timestamp timestamp = ((TimestampObjectInspector) fieldOI).getPrimitiveJavaObject(structFieldData); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java index ba7760808d..0dbe041369 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritableV2.java @@ -20,6 +20,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.time.*; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.io.WritableComparable; @@ -58,8 +59,9 @@ public DateWritableV2(int d) { * Set the DateWritableV2 based on the days since epoch date. * @param d integer value representing days since epoch date */ - public void set(int d) { + public boolean set(int d) { date = Date.ofEpochDay(d); + return date != null; } /** @@ -68,7 +70,7 @@ public void set(int d) { */ public void set(Date d) { if (d == null) { - date = new Date(); + date = null; return; } @@ -76,7 +78,11 @@ public void set(Date d) { } public void set(DateWritableV2 d) { - set(d.getDays()); + if (d.date != null) { + set(d.getDays()); + } else { + date = null; + } } /** @@ -86,7 +92,7 @@ public Date get() { return date; } - public int getDays() { + public int getDays() throws DateTimeException { return date.toEpochDay(); } @@ -94,7 +100,10 @@ public int getDays() { * * @return time in seconds corresponding to this DateWritableV2 */ - public long getTimeInSeconds() { + public long getTimeInSeconds() throws DateTimeException{ + if (date == null) { + throw new DateTimeException("Null date, likely out of bounds"); + } return date.toEpochSecond(); } @@ -102,26 +111,41 @@ public static Date timeToDate(long seconds) { return Date.ofEpochMilli(seconds * 1000); } + /** + * Doesn't check for out-of-bounds dates + */ public static long daysToMillis(int days) { - return Date.ofEpochDay(days).toEpochMilli(); + return LocalDate.ofEpochDay(days).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); } + /** + * Doesn't check for out-of-bounds dates + */ public static int millisToDays(long millis) { - return Date.ofEpochMilli(millis).toEpochDay(); + return (int) LocalDateTime.ofInstant(Instant.ofEpochMilli(millis), ZoneOffset.UTC).toLocalDate().toEpochDay(); } - public static int dateToDays(Date d) { + public static int dateToDays(Date d) throws DateTimeException { + if (d == null) { + throw new DateTimeException("Null date, likely out of bounds"); + } return d.toEpochDay(); } @Deprecated public static int dateToDays(java.sql.Date d) { - return Date.ofEpochMilli(d.getTime()).toEpochDay(); + return (int) LocalDateTime.ofInstant(Instant.ofEpochMilli(d.getTime()), ZoneOffset.UTC) + .toLocalDate() + .toEpochDay(); } @Override public void readFields(DataInput in) throws IOException { - date.setTimeInDays(WritableUtils.readVInt(in)); + try { + date.setTimeInDays(WritableUtils.readVInt(in)); + } catch (DateTimeException e) { + date = null; + } } @Override @@ -139,12 +163,13 @@ public boolean equals(Object o) { if (!(o instanceof DateWritableV2)) { return false; } - return compareTo((DateWritableV2) o) == 0; + DateWritableV2 other = (DateWritableV2) o; + return date == null ? other.date == null : date.equals(other.date); } @Override public String toString() { - return date.toString(); + return date != null ? date.toString() : null; } @Override diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java index 9aa7f19ab2..bb8b4e37f8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java @@ -20,6 +20,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.time.DateTimeException; import java.time.format.DateTimeFormatter; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -113,10 +114,10 @@ public void set(byte[] bytes, int offset) { public void set(Timestamp t) { if (t == null) { - timestamp.set(null); - return; + timestamp = null; + } else { + timestamp.set(t); } - timestamp.set(t); bytesEmpty = true; timestampEmpty = false; } @@ -134,7 +135,11 @@ public void set(TimestampWritableV2 t) { } public static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) { - timestamp.setTimeInMillis(secondsAsMillis, nanos); + try { + timestamp.setTimeInMillis(secondsAsMillis, nanos); + } catch (DateTimeException e) { + timestamp = null; + } } public void setInternal(long secondsAsMillis, int nanos) { @@ -161,7 +166,11 @@ public void writeToByteStream(RandomAccessOutput byteStream) { */ public long getSeconds() { if (!timestampEmpty) { - return timestamp.toEpochSecond(); + if (timestamp != null) { + return timestamp.toEpochSecond(); + } else { + return 0; + } } else if (!bytesEmpty) { return TimestampWritableV2.getSeconds(currentBytes, offset); } else { @@ -175,7 +184,11 @@ public long getSeconds() { */ public int getNanos() { if (!timestampEmpty) { - return timestamp.getNanos(); + if (timestamp != null) { + return timestamp.getNanos(); + } else { + return 0; + } } else if (!bytesEmpty) { return hasDecimalOrSecondVInt() ? TimestampWritableV2.getNanos(currentBytes, offset + 4) : 0; @@ -366,6 +379,12 @@ public boolean equals(Object o) { @Override public String toString() { + // null internals + if (!timestampEmpty && timestamp == null || + !bytesEmpty && internalBytes == nullBytes) { + return null; + } + if (timestampEmpty) { populateTimestamp(); } @@ -398,7 +417,11 @@ public int hashCode() { private void populateTimestamp() { long seconds = getSeconds(); int nanos = getNanos(); - timestamp.setTimeInSeconds(seconds, nanos); + try { + timestamp.setTimeInSeconds(seconds, nanos); + } catch (DateTimeException e) { + timestamp = null; + } } /** Static methods **/ @@ -458,6 +481,10 @@ public static int getNanos(byte[] bytes, int offset) { */ public static void convertTimestampToBytes(Timestamp t, byte[] b, int offset) { + if (t == null) { + b = nullBytes; + return; + } long seconds = t.toEpochSecond(); int nanos = t.getNanos(); @@ -523,19 +550,6 @@ public static HiveDecimal getHiveDecimal(Timestamp timestamp) { return result; } - /** - * Converts the time in seconds or milliseconds to a timestamp. - * @param time time in seconds or in milliseconds - * @return the timestamp - */ - public static Timestamp longToTimestamp(long time, boolean intToTimestampInSeconds) { - // If the time is in seconds, converts it to milliseconds first. - if (intToTimestampInSeconds) { - return Timestamp.ofEpochSecond(time); - } - return Timestamp.ofEpochMilli(time); - } - public static void setTimestamp(Timestamp t, byte[] bytes, int offset) { long seconds = getSeconds(bytes, offset); int nanos; @@ -544,7 +558,11 @@ public static void setTimestamp(Timestamp t, byte[] bytes, int offset) { } else { nanos = 0; } - t.setTimeInSeconds(seconds, nanos); + try { + t.setTimeInSeconds(seconds, nanos); + } catch (DateTimeException e) { + t = null; + } } public static Timestamp createTimestamp(byte[] bytes, int offset) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe2.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe2.java index 3e0689297a..9ecdce8255 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe2.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe2.java @@ -338,7 +338,9 @@ void serialize(RandomAccessOutput byteStream, Object obj, ObjectInspector objIns void serialize(RandomAccessOutput byteStream, Object obj, ObjectInspector objInspector, boolean skipLengthPrefix, BooleanRef warnedOnceNullMapKey) { DateWritableV2 d = ((DateObjectInspector) objInspector).getPrimitiveWritableObject(obj); - LazyBinarySerDe.writeDateToByteStream(byteStream, d); + if (d != null && d.get() != null) { + LazyBinarySerDe.writeDateToByteStream(byteStream, d); + } } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java index ec56b82bee..cf6aa4224e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.serde2.lazybinary.fast; import java.io.IOException; +import java.time.DateTimeException; import java.util.ArrayDeque; import java.util.Deque; import java.util.List; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 044fd16720..3a3985d2a1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -852,12 +852,19 @@ public static int hashCodeMurmur(Object o, ObjectInspector objIns, ByteBuffer by return Murmur3.hash32(((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).getBytes()); case DATE: - byteBuffer.putInt(((DateObjectInspector) poi).getPrimitiveWritableObject(o).getDays()); - return Murmur3.hash32(byteBuffer.array(), 4); + DateWritableV2 dw = ((DateObjectInspector) poi).getPrimitiveWritableObject(o); + if (dw != null && dw.get() != null) { + byteBuffer.putInt(dw.getDays()); + return Murmur3.hash32(byteBuffer.array(), 4); + } + return 0; case TIMESTAMP: { TimestampWritableV2 t = ((TimestampObjectInspector) poi) .getPrimitiveWritableObject(o); - return Murmur3.hash32(t.getBytes()); + if (t != null && t.getTimestamp() != null) { + return Murmur3.hash32(t.getBytes()); + } + return 0; } case TIMESTAMPLOCALTZ: return Murmur3.hash32((((TimestampLocalTZObjectInspector) poi).getPrimitiveWritableObject(o)).getBytes()); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaDateObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaDateObjectInspector.java index 4cf0a60b48..afa8a7c328 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaDateObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaDateObjectInspector.java @@ -21,6 +21,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import java.time.DateTimeException; + /** * A JavaDateObjectInspector inspects a Java Date Object. */ @@ -49,8 +51,12 @@ public Object set(Object o, Date value) { if (value == null) { return null; } - ((Date) o).setTimeInDays(value.toEpochDay()); - return o; + try { + ((Date) o).setTimeInDays(value.toEpochDay()); + return o; + } catch (DateTimeException e) { + return null; + } } @Deprecated @@ -58,6 +64,9 @@ public Object set(Object o, java.sql.Date value) { if (value == null) { return null; } + if (o == null) { + o = new Date(); + } ((Date) o).setTimeInMillis(value.getTime()); return o; } @@ -66,8 +75,12 @@ public Object set(Object o, DateWritableV2 d) { if (d == null) { return null; } - ((Date) o).setTimeInDays(d.get().toEpochDay()); - return o; + try { + ((Date) o).setTimeInDays(d.get().toEpochDay()); + return o; + } catch (DateTimeException e) { + return null; + } } @Deprecated diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampObjectInspector.java index 47719c8564..dbbb5137b1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampObjectInspector.java @@ -21,6 +21,8 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import java.time.DateTimeException; + public class JavaTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector implements SettableTimestampObjectInspector { @@ -56,7 +58,11 @@ public Object set(Object o, java.sql.Timestamp value) { if (value == null) { return null; } - ((Timestamp) o).setTimeInMillis(value.getTime(), value.getNanos()); + try { + ((Timestamp) o).setTimeInMillis(value.getTime(), value.getNanos()); + } catch (DateTimeException e) { + o = null; + } return o; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 3886b202c7..d9cfb751fa 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -1155,11 +1155,16 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { break; } case DATE: - result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get(); + DateWritableV2 dateWritableV2 = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + if (dateWritableV2 != null) { + result = dateWritableV2.get(); + } break; case TIMESTAMP: - result = DateWritableV2.timeToDate( - ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getSeconds()); + TimestampWritableV2 timestampWritableV2 = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + if (timestampWritableV2 != null) { + result = DateWritableV2.timeToDate(timestampWritableV2.getSeconds()); + } break; case TIMESTAMPLOCALTZ: String tstz = oi.getPrimitiveWritableObject(o).toString(); @@ -1194,23 +1199,23 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, break; case BOOLEAN: longValue = ((BooleanObjectInspector) inputOI).get(o) ? 1 : 0; - result = TimestampWritableV2.longToTimestamp(longValue, intToTimestampInSeconds); + result = TimestampUtils.longToTimestamp(longValue, intToTimestampInSeconds); break; case BYTE: longValue = ((ByteObjectInspector) inputOI).get(o); - result = TimestampWritableV2.longToTimestamp(longValue, intToTimestampInSeconds); + result = TimestampUtils.longToTimestamp(longValue, intToTimestampInSeconds); break; case SHORT: longValue = ((ShortObjectInspector) inputOI).get(o); - result = TimestampWritableV2.longToTimestamp(longValue, intToTimestampInSeconds); + result = TimestampUtils.longToTimestamp(longValue, intToTimestampInSeconds); break; case INT: longValue = ((IntObjectInspector) inputOI).get(o); - result = TimestampWritableV2.longToTimestamp(longValue, intToTimestampInSeconds); + result = TimestampUtils.longToTimestamp(longValue, intToTimestampInSeconds); break; case LONG: longValue = ((LongObjectInspector) inputOI).get(o); - result = TimestampWritableV2.longToTimestamp(longValue, intToTimestampInSeconds); + result = TimestampUtils.longToTimestamp(longValue, intToTimestampInSeconds); break; case FLOAT: result = TimestampUtils.doubleToTimestamp(((FloatObjectInspector) inputOI).get(o)); @@ -1239,13 +1244,7 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, result = ((TimestampObjectInspector) inputOI).getPrimitiveWritableObject(o).getTimestamp(); break; case TIMESTAMPLOCALTZ: - String tstz = inputOI.getPrimitiveWritableObject(o).toString(); - int index = tstz.indexOf(" "); - index = tstz.indexOf(" ", index + 1); - if (index == -1) { - return null; - } - result = Timestamp.valueOf(tstz.substring(0, index)); + result = TimestampUtils.timestampLocalTzToTimestamp(inputOI.getPrimitiveWritableObject(o).toString()); break; default: throw new RuntimeException("Hive 2 Internal error: unknown type: " diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableDateObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableDateObjectInspector.java index 6a96dddca9..853442da7b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableDateObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableDateObjectInspector.java @@ -34,7 +34,10 @@ public WritableDateObjectInspector() { @Override public DateWritableV2 getPrimitiveWritableObject(Object o) { - return o == null ? null : (DateWritableV2) o; + if (o instanceof DateWritableV2 && ((DateWritableV2) o).get() != null) { + return (DateWritableV2) o; + } + return null; } public Date getPrimitiveJavaObject(Object o) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampObjectInspector.java index e0ab191b73..7b455990f0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampObjectInspector.java @@ -31,7 +31,10 @@ public WritableTimestampObjectInspector() { @Override public TimestampWritableV2 getPrimitiveWritableObject(Object o) { - return o == null ? null : (TimestampWritableV2) o; + if (o instanceof TimestampWritableV2 && ((TimestampWritableV2) o).getTimestamp() != null) { + return (TimestampWritableV2) o; + } + return null; } public Timestamp getPrimitiveJavaObject(Object o) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java index b26d3422f9..a6a5d7d919 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java +++ serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java @@ -30,6 +30,7 @@ import java.io.InputStream; import java.math.BigInteger; import java.text.ParseException; +import java.time.DateTimeException; import static java.lang.String.format; @@ -126,10 +127,14 @@ public Date readDate() throws IOException, ParseException { dateString = StringUtils.leftPad(dateString, DATE_STRING_LENGTH, '0'); } Date date = new Date(); - date.setYear(Integer.parseInt(dateString.substring(0, 4))); - date.setMonth(Integer.parseInt(dateString.substring(4, 6))); - date.setDayOfMonth(Integer.parseInt(dateString.substring(6, 8))); - return date; + try { + date.setYear(Integer.parseInt(dateString.substring(0, 4))); + date.setMonth(Integer.parseInt(dateString.substring(4, 6))); + date.setDayOfMonth(Integer.parseInt(dateString.substring(6, 8))); + return date; + } catch (DateTimeException e) { + return null; + } } /** diff --git serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index 7e10af7177..4ff23d26d9 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -395,7 +395,11 @@ public static void serializeWrite(SerializeWrite serializeWrite, case DATE: { Date value = ((DateWritableV2) object).get(); - serializeWrite.writeDate(value); + if (value != null) { + serializeWrite.writeDate(value); + } else { + serializeWrite.writeNull(); + } } break; case TIMESTAMP: diff --git serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritableV2.java serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritableV2.java index 155dc1f58c..3378fc6f03 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritableV2.java +++ serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritableV2.java @@ -247,6 +247,9 @@ private static int randomNanos(Random rand) { private static void checkTimestampWithAndWithoutNanos(Timestamp ts, int nanos) throws IOException { + if (ts == null) { + return; + } serializeDeserializeAndCheckTimestamp(ts); ts.setNanos(nanos); @@ -475,7 +478,9 @@ public void testBinarySortable() { List tswList = new ArrayList(); for (int i = 0; i < 50; ++i) { Timestamp ts = Timestamp.ofEpochMilli(rand.nextLong(), randomNanos(rand)); - tswList.add(new TimestampWritableV2(ts)); + if (ts != null) { + tswList.add(new TimestampWritableV2(ts)); + } } for (TimestampWritableV2 tsw1 : tswList) { byte[] bs1 = tsw1.getBinarySortable();