diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java index 26f1e75c7d659a634cd4eef3a0cb8e886b22722f..a1b3a2fa5a98b6680ea6360f28af5041b390b98b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java @@ -139,14 +139,13 @@ private TimeZone getParquetWriterTimeZone(Properties tableProperties) { String timeZoneID = tableProperties.getProperty(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY); if (!Strings.isNullOrEmpty(timeZoneID)) { - if (!Arrays.asList(TimeZone.getAvailableIDs()).contains(timeZoneID)) { + if (TimeZone.getTimeZone(timeZoneID).getID().equals("GMT") + && !"GMT".equals(timeZoneID)) { throw new IllegalStateException("Unexpected timezone id found for parquet int96 conversion: " + timeZoneID); } return TimeZone.getTimeZone(timeZoneID); } - // If no timezone is defined in table properties, then adjust timestamps using - // PARQUET_INT96_NO_ADJUSTMENT_ZONE timezone - return TimeZone.getTimeZone(ParquetTableUtils.PARQUET_INT96_NO_ADJUSTMENT_ZONE); + return TimeZone.getDefault(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java index 8e33b7d437894b33b35f32913a3bc02f2a849ce3..4fbf23e44d589b0d79c8661b160b53271de1e796 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -44,7 +44,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.TimeZone; @@ -170,7 +169,7 @@ protected void setTimeZoneConversion(Configuration configuration, Path finalPath boolean skipConversion = HiveConf.getBoolVar(configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - if (!Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr") || + if (!Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr") && skipConversion) { // Impala writes timestamp values using GMT only. We should not try to convert Impala // files to other type of timezones. @@ -179,16 +178,17 @@ protected void setTimeZoneConversion(Configuration configuration, Path finalPath // TABLE_PARQUET_INT96_TIMEZONE is a table property used to detect what timezone conversion // to use when reading Parquet timestamps. timeZoneID = configuration.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, - ParquetTableUtils.PARQUET_INT96_NO_ADJUSTMENT_ZONE); + TimeZone.getDefault().getID()); - if (!Arrays.asList(TimeZone.getAvailableIDs()).contains(timeZoneID)) { - throw new IllegalStateException("Unexpected timezone id found for parquet int96 conversion: " + timeZoneID); + if (TimeZone.getTimeZone(timeZoneID).getID().equals("GMT") + && !"GMT".equals(timeZoneID)) { + throw new IllegalStateException( + "Unexpected timezone id found for parquet int96 conversion: " + timeZoneID); } } // 'timeZoneID' should be valid, since we did not throw exception above - configuration.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, - TimeZone.getTimeZone(timeZoneID).getID()); + configuration.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY,timeZoneID); } public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index 5dc808800290f3274afbdff12134ac34387a746b..f2f9035dfeb9ea492aea1b7088d54dcd5f293e0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -152,7 +152,7 @@ public static Timestamp getTimestamp(NanoTime nt, Calendar calendar) { calendar.setTimeInMillis(utcCalendar.getTimeInMillis()); - Calendar adjusterCalendar = copyToCalendarWithTZ(calendar, Calendar.getInstance()); + Calendar adjusterCalendar = copyToCalendarWithTZ(calendar, getLocalCalendar()); Timestamp ts = new Timestamp(adjusterCalendar.getTimeInMillis()); ts.setNanos((int) nanos); diff --git ql/src/test/queries/clientpositive/parquet_int96_timestamp.q ql/src/test/queries/clientpositive/parquet_int96_timestamp.q index 5de2c3f1244b8340b97eb0547fe66e52d80fb065..6eadd1b0a3313cbba7a798890b802baae302749e 100644 --- ql/src/test/queries/clientpositive/parquet_int96_timestamp.q +++ ql/src/test/queries/clientpositive/parquet_int96_timestamp.q @@ -2,7 +2,7 @@ create table dummy (id int); insert into table dummy values (1); set hive.parquet.mr.int96.enable.utc.write.zone=true; -set hive.parquet.timestamp.skip.conversion=false; +set hive.parquet.timestamp.skip.conversion=true; -- read/write timestamps using UTC as default write zone create table timestamps (ts timestamp) stored as parquet;