diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java index 213650c2a5..4708d35a78 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java @@ -17,12 +17,13 @@ */ package org.apache.hadoop.hive.common.type; -import java.text.DateFormat; -import java.text.SimpleDateFormat; import java.time.DateTimeException; +import java.time.Instant; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.LocalTime; import java.time.ZoneId; +import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; @@ -145,4 +146,19 @@ public static ZoneId parseTimeZone(String timeZoneStr) { } } + /** + * Timestamps are technically time zone agnostic, and this method sort of cheats its logic. + * Timestamps are supposed to represent nanos since [UTC epoch]. Here, + * the input timestamp represents nanoseconds since [epoch at fromZone], and + * we return a Timestamp representing nanoseconds since [epoch at toZone]. + */ + public static Timestamp convertTimestampToZone(Timestamp ts, ZoneId fromZone, ZoneId toZone) { + // get nanos since [epoch at fromZone] + Instant instant = convert(ts, fromZone).getZonedDateTime().toInstant(); + // get [local time at toZone] + LocalDateTime localDateTimeAtToZone = LocalDateTime.ofInstant(instant, toZone); + // get nanos between [epoch at toZone] and [local time at toZone] + return Timestamp.ofEpochSecond(localDateTimeAtToZone.toEpochSecond(ZoneOffset.UTC), + localDateTimeAtToZone.getNano()); + } } diff --git data/files/parquet_historical_timestamp_legacy.parq data/files/parquet_historical_timestamp_legacy.parq new file mode 100644 index 0000000000000000000000000000000000000000..3dd855140b943108855deaa9d1e1e507a6206558 GIT binary patch literal 386 zcmah_%}T>S5T0y92q;ny6LxXITv{rmVe{8U@X}im(O|txeij>OT1}Jo0el-@#Fy}4 z+;}npi z+CIp7U+%WH=Xz?$o@8ygDQppV>1-B-LR`xri+$k>;rXc-he4P}v7Z(5FrNqM72{la KVJedo<^DHlaZcg@ literal 0 HcmV?d00001 diff --git data/files/parquet_historical_timestamp_new.parq data/files/parquet_historical_timestamp_new.parq new file mode 100644 index 0000000000000000000000000000000000000000..aab0707aa226d0d3457b55eb7a57415d00f167df GIT binary patch literal 418 zcmah`%}T>S5T0y92vnpVrtCrvxwKSB metadata = parent.getMetadata(); - //Current Hive parquet timestamp implementation stores it in UTC, but other components do not do that. - //If this file written by current Hive implementation itself, we need to do the reverse conversion, else skip the conversion. + // Current Hive parquet timestamp implementation stores timestamps in UTC, but other + // components do not. In this case we skip timestamp conversion. + // If this file is written by a version of hive before HIVE-21290, file metadata will + // not contain the writer timezone, so we convert the timestamp to the system (reader) + // time zone. + // If file is written by current Hive implementation, we convert timestamps to the writer + // time zone in order to emulate time zone agnostic behavior. boolean skipConversion = Boolean.parseBoolean( metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); - Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion); + Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion, + DataWritableReadSupport.getWriterTimeZoneId(metadata)); return new TimestampWritableV2(ts); } }; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 7f2a684d28..30f3d1737d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -13,6 +13,8 @@ */ package org.apache.hadoop.hive.ql.io.parquet.read; +import java.time.DateTimeException; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -26,6 +28,7 @@ import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.FieldNode; import org.apache.hadoop.hive.ql.optimizer.NestedColumnFieldPruningUtils; @@ -262,6 +265,25 @@ public static MessageType getProjectedSchema( return new MessageType(schema.getName(), schemaTypes); } + /** + * Get a valid zoneId from some metadata, otherwise return null. + */ + public static ZoneId getWriterTimeZoneId(Map metadata) { + if (metadata == null) { + return null; + } + String value = metadata.get(DataWritableWriteSupport.WRITER_TIMEZONE); + try { + if (value != null) { + return ZoneId.of(value); + } + } catch (DateTimeException e) { + throw new RuntimeException("Can't parse writer time zone stored in file metadata", e); + } + + return null; + } + /** * Return the columns which contains required nested attribute level * E.g., given struct a: while 'x' is required and 'y' is not, the method will return @@ -448,11 +470,23 @@ private static MessageType getRequestedPrunedSchema( throw new IllegalStateException("ReadContext not initialized properly. " + "Don't know the Hive Schema."); } + String key = HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname; if (!metadata.containsKey(key)) { metadata.put(key, String.valueOf(HiveConf.getBoolVar( configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); } + + String writerTimezone = DataWritableWriteSupport.WRITER_TIMEZONE; + if (!metadata.containsKey(writerTimezone)) { + if (keyValueMetaData.containsKey(writerTimezone)) { + metadata.put(writerTimezone, keyValueMetaData.get(writerTimezone)); + } + } else if (!metadata.get(writerTimezone).equals(keyValueMetaData.get(writerTimezone))) { + throw new IllegalStateException("Metadata contains a writer time zone that does not match " + + "file footer's writer time zone."); + } + return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index bf78d8cc5b..44a4858e07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -13,6 +13,8 @@ */ package org.apache.hadoop.hive.ql.io.parquet.timestamp; +import java.time.ZoneId; +import java.time.ZoneOffset; import java.util.Calendar; import java.util.GregorianCalendar; import java.util.TimeZone; @@ -21,6 +23,7 @@ import org.apache.hadoop.hive.common.type.Timestamp; import jodd.datetime.JDateTime; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; /** * Utilities for converting from java.sql.Timestamp to parquet timestamp. @@ -33,32 +36,40 @@ static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1); private static final ThreadLocal parquetGMTCalendar = new ThreadLocal(); - private static final ThreadLocal parquetLocalCalendar = new ThreadLocal(); private static Calendar getGMTCalendar() { //Calendar.getInstance calculates the current-time needlessly, so cache an instance. if (parquetGMTCalendar.get() == null) { parquetGMTCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); } + parquetGMTCalendar.get().clear(); return parquetGMTCalendar.get(); } - private static Calendar getLocalCalendar() { - if (parquetLocalCalendar.get() == null) { - parquetLocalCalendar.set(Calendar.getInstance()); - } - return parquetLocalCalendar.get(); - } - - public static Calendar getCalendar(boolean skipConversion) { - Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar(); - calendar.clear(); // Reset all fields before reusing this instance - return calendar; + public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) { + return getNanoTime(ts, skipConversion, null); } - public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) { + /** + * Gets a NanoTime object, which represents timestamps as nanoseconds since epoch, from a + * Timestamp object. Parquet will store this NanoTime object as int96. + * + * If skipConversion flag is on, the timestamp will be converted to NanoTime as-is, i.e. + * timeZoneId argument will be ignored. + * If skipConversion is off, timestamp can be converted from a given time zone (timeZoneId) to UTC + * if timeZoneId is present, and if not present: from system time zone to UTC, before being + * converted to NanoTime. + * (See TimestampDataWriter#write for current Hive writing procedure.) + */ + public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion, ZoneId timeZoneId) { + if (skipConversion) { + timeZoneId = ZoneOffset.UTC; + } else if (timeZoneId == null) { + timeZoneId = TimeZone.getDefault().toZoneId(); + } + ts = TimestampTZUtil.convertTimestampToZone(ts, timeZoneId, ZoneOffset.UTC); - Calendar calendar = getCalendar(skipConversion); + Calendar calendar = getGMTCalendar(); calendar.setTimeInMillis(ts.toEpochMilli()); int year = calendar.get(Calendar.YEAR); if (calendar.get(Calendar.ERA) == GregorianCalendar.BC) { @@ -79,7 +90,29 @@ public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) { return new NanoTime(days, nanosOfDay); } - public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) { + public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) { + return getTimestamp(nt, skipConversion, null); + } + + /** + * Gets a Timestamp object from a NanoTime object, which represents timestamps as nanoseconds + * since epoch. Parquet stores these as int96. + * + * Before converting to NanoTime, we may convert the timestamp to a desired time zone + * (timeZoneId). This will only happen if skipConversion flag is off. + * If skipConversion is off and timeZoneId is not found, then convert the timestamp to system + * time zone. + * + * For skipConversion to be true it must be set in conf AND the parquet file must NOT be written + * by parquet's java library (parquet-mr). This is enforced in ParquetRecordReaderBase#getSplit. + */ + public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion, ZoneId timeZoneId) { + if (skipConversion) { + timeZoneId = ZoneOffset.UTC; + } else if (timeZoneId == null) { + timeZoneId = TimeZone.getDefault().toZoneId(); + } + int julianDay = nt.getJulianDay(); long nanosOfDay = nt.getTimeOfDayNanos(); @@ -92,7 +125,7 @@ public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) { } JDateTime jDateTime = new JDateTime((double) julianDay); - Calendar calendar = getCalendar(skipConversion); + Calendar calendar = getGMTCalendar(); calendar.set(Calendar.YEAR, jDateTime.getYear()); calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1. calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); @@ -107,7 +140,9 @@ public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) { calendar.set(Calendar.HOUR_OF_DAY, hour); calendar.set(Calendar.MINUTE, minutes); calendar.set(Calendar.SECOND, seconds); + Timestamp ts = Timestamp.ofEpochMilli(calendar.getTimeInMillis(), (int) nanos); + ts = TimestampTZUtil.convertTimestampToZone(ts, ZoneOffset.UTC, timeZoneId); return ts; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java index 9ce1ba4591..e8fcb6b214 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java @@ -39,6 +39,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.time.ZoneId; import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL; import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; @@ -53,6 +54,7 @@ private static final Logger LOG = LoggerFactory.getLogger(BaseVectorizedColumnReader.class); protected boolean skipTimestampConversion = false; + protected ZoneId writerTimezone = null; /** * Total number of values read. @@ -116,12 +118,14 @@ public BaseVectorizedColumnReader( ColumnDescriptor descriptor, PageReader pageReader, boolean skipTimestampConversion, + ZoneId writerTimezone, Type parquetType, TypeInfo hiveType) throws IOException { this.descriptor = descriptor; this.type = parquetType; this.pageReader = pageReader; this.maxDefLevel = descriptor.getMaxDefinitionLevel(); this.skipTimestampConversion = skipTimestampConversion; + this.writerTimezone = writerTimezone; this.hiveType = hiveType; DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); @@ -130,7 +134,7 @@ public BaseVectorizedColumnReader( this.dictionary = ParquetDataColumnReaderFactory .getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType, dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage), - skipTimestampConversion); + skipTimestampConversion, writerTimezone); this.isCurrentPageDictionaryEncoded = true; } catch (IOException e) { throw new IOException("could not decode the dictionary for " + descriptor, e); @@ -182,11 +186,11 @@ private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int } dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType, dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary - .getDictionary()), skipTimestampConversion); + .getDictionary()), skipTimestampConversion, writerTimezone); this.isCurrentPageDictionaryEncoded = true; } else { dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType, - dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion); + dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion, writerTimezone); this.isCurrentPageDictionaryEncoded = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java index 9170e9f9b3..2eb3a9bfaf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java @@ -42,6 +42,7 @@ import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.time.ZoneId; import java.util.Arrays; /** @@ -980,16 +981,20 @@ private static String convertToString(boolean value) { */ public static class TypesFromInt96PageReader extends DefaultParquetDataColumnReader { private boolean skipTimestampConversion = false; + private ZoneId writerTimezone; public TypesFromInt96PageReader(ValuesReader realReader, int length, - boolean skipTimestampConversion) { + boolean skipTimestampConversion, ZoneId writerTimezone) { super(realReader, length); this.skipTimestampConversion = skipTimestampConversion; + this.writerTimezone = writerTimezone; } - public TypesFromInt96PageReader(Dictionary dict, int length, boolean skipTimestampConversion) { + public TypesFromInt96PageReader(Dictionary dict, int length, boolean skipTimestampConversion, + ZoneId writerTimezone) { super(dict, length); this.skipTimestampConversion = skipTimestampConversion; + this.writerTimezone = writerTimezone; } private Timestamp convert(Binary binary) { @@ -998,7 +1003,7 @@ private Timestamp convert(Binary binary) { long timeOfDayNanos = buf.getLong(); int julianDay = buf.getInt(); NanoTime nt = new NanoTime(julianDay, timeOfDayNanos); - return NanoTimeUtils.getTimestamp(nt, skipTimestampConversion); + return NanoTimeUtils.getTimestamp(nt, skipTimestampConversion, writerTimezone); } @Override @@ -1185,7 +1190,8 @@ private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean i Dictionary dictionary, ValuesReader valuesReader, boolean - skipTimestampConversion) + skipTimestampConversion, + ZoneId writerTimezone) throws IOException { // max length for varchar and char cases int length = getVarcharLength(hiveType); @@ -1293,8 +1299,8 @@ private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean i TypesFromFloatPageReader(valuesReader, length); case INT96: return isDictionary ? new TypesFromInt96PageReader(dictionary, length, - skipTimestampConversion) : new - TypesFromInt96PageReader(valuesReader, length, skipTimestampConversion); + skipTimestampConversion, writerTimezone) : new + TypesFromInt96PageReader(valuesReader, length, skipTimestampConversion, writerTimezone); case BOOLEAN: return isDictionary ? new TypesFromBooleanPageReader(dictionary, length) : new TypesFromBooleanPageReader(valuesReader, length); @@ -1341,19 +1347,22 @@ private static ParquetDataColumnReader getConvertorFromBinary(boolean isDict, public static ParquetDataColumnReader getDataColumnReaderByTypeOnDictionary( PrimitiveType parquetType, TypeInfo hiveType, - Dictionary realReader, boolean skipTimestampConversion) + Dictionary realReader, + boolean skipTimestampConversion, + ZoneId writerTimezone) throws IOException { return getDataColumnReaderByTypeHelper(true, parquetType, hiveType, realReader, null, - skipTimestampConversion); + skipTimestampConversion, writerTimezone); } public static ParquetDataColumnReader getDataColumnReaderByType(PrimitiveType parquetType, TypeInfo hiveType, ValuesReader realReader, - boolean skipTimestampConversion) + boolean skipTimestampConversion, + ZoneId writerTimezone) throws IOException { return getDataColumnReaderByTypeHelper(false, parquetType, hiveType, null, realReader, - skipTimestampConversion); + skipTimestampConversion, writerTimezone); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index 7e52b076b7..5c1ce70075 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -28,6 +28,7 @@ import org.apache.parquet.column.page.PageReader; import org.apache.parquet.schema.Type; import java.io.IOException; +import java.time.ZoneId; import java.util.ArrayList; import java.util.List; @@ -47,9 +48,9 @@ boolean isFirstRow = true; public VectorizedListColumnReader(ColumnDescriptor descriptor, PageReader pageReader, - boolean skipTimestampConversion, Type type, TypeInfo hiveType) + boolean skipTimestampConversion, ZoneId writerTimezone, Type type, TypeInfo hiveType) throws IOException { - super(descriptor, pageReader, skipTimestampConversion, type, hiveType); + super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index f64efe26f5..f0b512e3bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -73,6 +73,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -125,6 +126,7 @@ * rows of all the row groups. */ protected long totalRowCount = 0; + private ZoneId writerTimezone; public VectorizedParquetRecordReader( org.apache.hadoop.mapred.InputSplit oldInputSplit, JobConf conf) { @@ -249,6 +251,8 @@ public void initialize( this.totalRowCount += block.getRowCount(); } this.fileSchema = footer.getFileMetaData().getSchema(); + this.writerTimezone = DataWritableReadSupport + .getWriterTimeZoneId(footer.getFileMetaData().getKeyValueMetaData()); colsToInclude = ColumnProjectionUtils.getReadColumnIDs(configuration); requestedSchema = DataWritableReadSupport @@ -438,13 +442,13 @@ private void checkEndOfRowGroup() throws IOException { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(colsToInclude.get(i)), types.get(i), - pages, requestedSchema.getColumns(), skipTimestampConversion, 0); + pages, requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0); } } } else { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(i), types.get(i), pages, - requestedSchema.getColumns(), skipTimestampConversion, 0); + requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0); } } @@ -487,6 +491,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( PageReadStore pages, List columnDescriptors, boolean skipTimestampConversion, + ZoneId writerTimezone, int depth) throws IOException { List descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors); @@ -498,7 +503,8 @@ private VectorizedColumnReader buildVectorizedParquetReader( } if (fileSchema.getColumns().contains(descriptors.get(0))) { return new VectorizedPrimitiveColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo); + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, type, + typeInfo); } else { // Support for schema evolution return new VectorizedDummyColumnReader(); @@ -511,7 +517,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( for (int i = 0; i < fieldTypes.size(); i++) { VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, - skipTimestampConversion, depth + 1); + skipTimestampConversion, writerTimezone, depth + 1); if (r != null) { fieldReaders.add(r); } else { @@ -529,7 +535,8 @@ private VectorizedColumnReader buildVectorizedParquetReader( } return new VectorizedListColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, + getElementType(type), typeInfo); case MAP: if (columnDescriptors == null || columnDescriptors.isEmpty()) { @@ -562,10 +569,10 @@ private VectorizedColumnReader buildVectorizedParquetReader( List kvTypes = groupType.getFields(); VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader( descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, - kvTypes.get(0), typeInfo); + writerTimezone, kvTypes.get(0), typeInfo); VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader( descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, - kvTypes.get(1), typeInfo); + writerTimezone, kvTypes.get(1), typeInfo); return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader); case UNION: default: diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java index e4c61562e4..9994190164 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java @@ -28,6 +28,7 @@ import org.apache.parquet.schema.Type; import java.io.IOException; +import java.time.ZoneId; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; @@ -42,8 +43,11 @@ public VectorizedPrimitiveColumnReader( ColumnDescriptor descriptor, PageReader pageReader, boolean skipTimestampConversion, - Type type, TypeInfo hiveType) throws IOException { - super(descriptor, pageReader, skipTimestampConversion, type, hiveType); + ZoneId writerTimezone, + Type type, + TypeInfo hiveType) + throws IOException { + super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java index d13b898023..352e9141fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java @@ -14,6 +14,8 @@ package org.apache.hadoop.hive.ql.io.parquet.write; import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; @@ -31,6 +33,7 @@ public class DataWritableWriteSupport extends WriteSupport { public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema"; + public static final String WRITER_TIMEZONE = "writer.time.zone"; private DataWritableWriter writer; private MessageType schema; @@ -46,7 +49,9 @@ public static MessageType getSchema(final Configuration configuration) { @Override public WriteContext init(final Configuration configuration) { schema = getSchema(configuration); - return new WriteContext(schema, new HashMap()); + Map metaData = new HashMap<>(); + metaData.put(WRITER_TIMEZONE, TimeZone.getDefault().toZoneId().toString()); + return new WriteContext(schema, metaData); } @Override diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index 477825e3f4..52b84015f6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -13,12 +13,13 @@ */ package org.apache.hadoop.hive.ql.io.parquet.serde; +import java.time.ZoneId; import java.util.Calendar; -import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; @@ -32,6 +33,10 @@ */ public class TestParquetTimestampUtils extends TestCase { + public static final ZoneId GMT = ZoneId.of("GMT"); + public static final ZoneId US_PACIFIC = ZoneId.of("US/Pacific"); + public static final ZoneId NEW_YORK = ZoneId.of("America/New_York"); + public void testJulianDay() { //check if May 23, 1968 is Julian Day 2440000 Calendar cal = Calendar.getInstance(); @@ -76,10 +81,11 @@ public void testJulianDay() { Assert.assertEquals(ts2Fetched, ts2); Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30); - //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005. + // check if 730517 Julian Days between Jan 1, 0005 and Jan 31, 2005. + // This method used to test Julian Days between Jan 1, 2005 BCE and Jan 1, 2005 CE. Since BCE + // timestamps are not supported, both dates were changed to CE. cal1 = Calendar.getInstance(); - cal1.set(Calendar.ERA, GregorianCalendar.BC); - cal1.set(Calendar.YEAR, 2005); + cal1.set(Calendar.YEAR, 0005); cal1.set(Calendar.MONTH, Calendar.JANUARY); cal1.set(Calendar.DAY_OF_MONTH, 1); cal1.set(Calendar.HOUR_OF_DAY, 0); @@ -103,7 +109,7 @@ public void testJulianDay() { ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); Assert.assertEquals(ts2Fetched, ts2); - Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305); + Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 730517); } public void testNanos() { @@ -119,7 +125,7 @@ public void testNanos() { Timestamp ts = Timestamp.ofEpochMilli(cal.getTimeInMillis(), 1); //(1*60*60 + 1*60 + 1) * 10e9 + 1 - NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); + NanoTime nt = NanoTimeUtils.getNanoTime(ts, false, GMT); Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L); //case 2: 23:59:59.999999999 @@ -134,7 +140,7 @@ public void testNanos() { ts = Timestamp.ofEpochMilli(cal.getTimeInMillis(), 999999999); //(23*60*60 + 59*60 + 59)*10e9 + 999999999 - nt = NanoTimeUtils.getNanoTime(ts, false); + nt = NanoTimeUtils.getNanoTime(ts, false, GMT); Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L); //case 3: verify the difference. @@ -158,15 +164,15 @@ public void testNanos() { cal1.setTimeZone(TimeZone.getTimeZone("GMT")); Timestamp ts1 = Timestamp.ofEpochMilli(cal1.getTimeInMillis(), 1); - NanoTime n2 = NanoTimeUtils.getNanoTime(ts2, false); - NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false); + NanoTime n2 = NanoTimeUtils.getNanoTime(ts2, false, GMT); + NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false, GMT); Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L); NanoTime n3 = new NanoTime(n1.getJulianDay() - 1, n1.getTimeOfDayNanos() + TimeUnit.DAYS.toNanos(1)); - Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); + Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false, GMT)); n3 = new NanoTime(n1.getJulianDay() + 3, n1.getTimeOfDayNanos() - TimeUnit.DAYS.toNanos(3)); - Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); + Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false, GMT)); } public void testTimezone() { @@ -179,6 +185,7 @@ public void testTimezone() { cal.set(Calendar.SECOND, 1); cal.setTimeZone(TimeZone.getTimeZone("US/Pacific")); Timestamp ts = Timestamp.ofEpochMilli(cal.getTimeInMillis(), 1); + ts = TimestampTZUtil.convertTimestampToZone(ts, GMT, US_PACIFIC); /** * 17:00 PDT = 00:00 GMT (daylight-savings) @@ -187,7 +194,7 @@ public void testTimezone() { * 17:00 PST = 01:00 GMT (if not daylight savings) * (1*60*60 + 1*60 + 1)*10e9 + 1 = 3661000000001 */ - NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); + NanoTime nt = NanoTimeUtils.getNanoTime(ts, false, US_PACIFIC); long timeOfDayNanos = nt.getTimeOfDayNanos(); Assert.assertTrue(timeOfDayNanos == 61000000001L || timeOfDayNanos == 3661000000001L); @@ -206,15 +213,15 @@ public void testTimezonelessValues() { public void testTimezoneless() { Timestamp ts1 = Timestamp.valueOf("2011-01-01 00:30:30.111111111"); NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, true); - Assert.assertEquals(nt1.getJulianDay(), 2455562); - Assert.assertEquals(nt1.getTimeOfDayNanos(), 59430111111111L); + Assert.assertEquals(nt1.getJulianDay(), 2455563); + Assert.assertEquals(nt1.getTimeOfDayNanos(), 1830111111111L); Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, true); Assert.assertEquals(ts1Fetched.toString(), ts1.toString()); Timestamp ts2 = Timestamp.valueOf("2011-02-02 08:30:30.222222222"); NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, true); Assert.assertEquals(nt2.getJulianDay(), 2455595); - Assert.assertEquals(nt2.getTimeOfDayNanos(), 1830222222222L); + Assert.assertEquals(nt2.getTimeOfDayNanos(), 30630222222222L); Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, true); Assert.assertEquals(ts2Fetched.toString(), ts2.toString()); } @@ -254,4 +261,13 @@ private void verifyTsString(String tsString, boolean local) { Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, local); Assert.assertEquals(tsString, tsFetched.toString()); } + + public void testConvertTimestampToZone() { + Timestamp ts = Timestamp.valueOf("2018-01-01 00:00:00"); + Timestamp ts1 = TimestampTZUtil.convertTimestampToZone(ts, NEW_YORK, US_PACIFIC); + Assert.assertTrue(Timestamp.valueOf("2017-12-31 21:00:00").equals(ts1)); + + Timestamp ts2 = TimestampTZUtil.convertTimestampToZone(ts, US_PACIFIC, NEW_YORK); + Assert.assertTrue(Timestamp.valueOf("2018-01-01 03:00:00").equals(ts2)); + } } diff --git ql/src/test/queries/clientpositive/parquet_external_time.q ql/src/test/queries/clientpositive/parquet_external_time.q index d83125cdba..19a7059f20 100644 --- ql/src/test/queries/clientpositive/parquet_external_time.q +++ ql/src/test/queries/clientpositive/parquet_external_time.q @@ -1,4 +1,5 @@ set hive.vectorized.execution.enabled=false; +set hive.parquet.timestamp.skip.conversion=true; create table timetest_parquet(t timestamp) stored as parquet; diff --git ql/src/test/queries/clientpositive/parquet_historical_timestamp.q ql/src/test/queries/clientpositive/parquet_historical_timestamp.q new file mode 100644 index 0000000000..3d2b382af7 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_historical_timestamp.q @@ -0,0 +1,16 @@ +--These files were created by inserting timestamp '2019-01-01 00:30:30.111111111' where writer time zone is Europe/Rome. + +--older writer: time zone dependent behavior. convert to reader time zone +create table legacy_table (t timestamp) stored as parquet; + +load data local inpath '../../data/files/parquet_historical_timestamp_legacy.parq' into table legacy_table; + +select * from legacy_table; + + +--newer writer: time zone agnostic behavior. convert to writer time zone +create table new_table (t timestamp) stored as parquet; + +load data local inpath '../../data/files/parquet_historical_timestamp_new.parq' into table new_table; + +select * from new_table; \ No newline at end of file diff --git ql/src/test/results/clientpositive/parquet_analyze.q.out ql/src/test/results/clientpositive/parquet_analyze.q.out index e746621afa..5a6cff9117 100644 --- ql/src/test/results/clientpositive/parquet_analyze.q.out +++ ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -94,7 +94,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 700 - totalSize 6692 + totalSize 6725 #### A masked pattern was here #### # Storage Information @@ -142,7 +142,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 5936 - totalSize 6692 + totalSize 6725 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/parquet_historical_timestamp.q.out ql/src/test/results/clientpositive/parquet_historical_timestamp.q.out new file mode 100644 index 0000000000..9d50b22677 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_historical_timestamp.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: create table legacy_table (t timestamp) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (t timestamp) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_legacy.parq' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_legacy.parq' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2018-12-31 16:30:30.111111111 +PREHOOK: query: create table new_table (t timestamp) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@new_table +POSTHOOK: query: create table new_table (t timestamp) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_table +PREHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_new.parq' into table new_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@new_table +POSTHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_new.parq' into table new_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@new_table +PREHOOK: query: select * from new_table +PREHOOK: type: QUERY +PREHOOK: Input: default@new_table +#### A masked pattern was here #### +POSTHOOK: query: select * from new_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_table +#### A masked pattern was here #### +2019-01-01 00:30:30.111111111 diff --git ql/src/test/results/clientpositive/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index a6cd581d73..2cda401cce 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -1783,7 +1783,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -1805,7 +1805,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30604,7 +30604,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30626,7 +30626,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30720,7 +30720,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30742,7 +30742,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30837,7 +30837,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30859,7 +30859,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30942,7 +30942,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30964,7 +30964,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 227514a78a..d6ae56cadb 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -1874,7 +1874,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -1896,7 +1896,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30700,7 +30700,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30722,7 +30722,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30819,7 +30819,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30841,7 +30841,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -30939,7 +30939,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30961,7 +30961,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet @@ -31049,7 +31049,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -31071,7 +31071,7 @@ STAGE PLANS: serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 595103 + totalSize 595136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe name: default.alltypesparquet