diff --git a/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java b/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java new file mode 100644 index 0000000000..9b491d0683 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; + +/** + * Conversion utilities from the hybrid Julian/Gregorian calendar to/from the + * proleptic Gregorian. + * + * The semantics here are to hold the string representation constant and change + * the epoch offset rather than holding the instant in time constant and change + * the string representation. + * + * These utilities will be fast for the common case (> 1582 AD), but slow for + * old dates. + */ +public class CalendarUtils { + + private static SimpleDateFormat createFormatter(String fmt, + GregorianCalendar calendar) { + SimpleDateFormat result = new SimpleDateFormat(fmt); + result.setCalendar(calendar); + return result; + } + + private static final String DATE = "yyyy-MM-dd"; + private static final String TIME = DATE + " HH:mm:ss.SSS"; + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); + private static final GregorianCalendar HYBRID = new GregorianCalendar(); + private static final ThreadLocal HYBRID_DATE_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(DATE, HYBRID)); + private static final ThreadLocal HYBRID_TIME_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(TIME, HYBRID)); + private static final long SWITCHOVER_MILLIS; + private static final long SWITCHOVER_DAYS; + private static final GregorianCalendar PROLEPTIC = new GregorianCalendar(); + private static final ThreadLocal PROLEPTIC_DATE_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(DATE, PROLEPTIC)); + private static final ThreadLocal PROLEPTIC_TIME_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(TIME, PROLEPTIC)); + + static { + HYBRID.setTimeZone(UTC); + PROLEPTIC.setTimeZone(UTC); + PROLEPTIC.setGregorianChange(new Date(Long.MIN_VALUE)); + + // Get the last day where the two calendars agree with each other. + try { + SWITCHOVER_MILLIS = HYBRID_DATE_FORMAT.get().parse("1582-10-15").getTime(); + SWITCHOVER_DAYS = TimeUnit.MILLISECONDS.toDays(SWITCHOVER_MILLIS); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse switch over date", e); + } + } + + /** + * Convert an epoch day from the hybrid Julian/Gregorian calendar to the + * proleptic Gregorian. + * @param hybrid day of epoch in the hybrid Julian/Gregorian + * @return day of epoch in the proleptic Gregorian + */ + public static int convertDateToProleptic(int hybrid) { + int proleptic = hybrid; + if (hybrid < SWITCHOVER_DAYS) { + String dateStr = HYBRID_DATE_FORMAT.get().format( + new Date(TimeUnit.DAYS.toMillis(hybrid))); + try { + proleptic = (int) TimeUnit.MILLISECONDS.toDays( + PROLEPTIC_DATE_FORMAT.get().parse(dateStr).getTime()); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return proleptic; + } + + /** + * Convert an epoch day from the proleptic Gregorian calendar to the hybrid + * Julian/Gregorian. + * @param proleptic day of epoch in the proleptic Gregorian + * @return day of epoch in the hybrid Julian/Gregorian + */ + public static int convertDateToHybrid(int proleptic) { + int hyrbid = proleptic; + if (proleptic < SWITCHOVER_DAYS) { + String dateStr = PROLEPTIC_DATE_FORMAT.get().format( + new Date(TimeUnit.DAYS.toMillis(proleptic))); + try { + hyrbid = (int) TimeUnit.MILLISECONDS.toDays( + HYBRID_DATE_FORMAT.get().parse(dateStr).getTime()); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return hyrbid; + } + + public static int convertDate(int original, + boolean fromProleptic, + boolean toProleptic) { + if (fromProleptic != toProleptic) { + return toProleptic + ? convertDateToProleptic(original) + : convertDateToHybrid(original); + } else { + return original; + } + } + + public static long convertTime(long original, + boolean fromProleptic, + boolean toProleptic) { + if (fromProleptic != toProleptic) { + return toProleptic + ? convertTimeToProleptic(original) + : convertTimeToHybrid(original); + } else { + return original; + } + } + /** + * Convert epoch millis from the hybrid Julian/Gregorian calendar to the + * proleptic Gregorian. + * @param hybrid millis of epoch in the hybrid Julian/Gregorian + * @return millis of epoch in the proleptic Gregorian + */ + public static long convertTimeToProleptic(long hybrid) { + long proleptic = hybrid; + if (hybrid < SWITCHOVER_MILLIS) { + String dateStr = HYBRID_TIME_FORMAT.get().format(new Date(hybrid)); + try { + proleptic = PROLEPTIC_TIME_FORMAT.get().parse(dateStr).getTime(); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return proleptic; + } + + /** + * Convert epoch millis from the proleptic Gregorian calendar to the hybrid + * Julian/Gregorian. + * @param proleptic millis of epoch in the proleptic Gregorian + * @return millis of epoch in the hybrid Julian/Gregorian + */ + public static long convertTimeToHybrid(long proleptic) { + long hybrid = proleptic; + if (proleptic < SWITCHOVER_MILLIS) { + String dateStr = PROLEPTIC_TIME_FORMAT.get().format(new Date(proleptic)); + try { + hybrid = HYBRID_TIME_FORMAT.get().parse(dateStr).getTime(); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return hybrid; + } + + private CalendarUtils() { + throw new UnsupportedOperationException(); + } +} \ No newline at end of file diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 76c6e39a48..7a1a8a43be 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1999,12 +1999,24 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion", true, "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" + "on reading parquet files from other tools"), + HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN("hive.parquet.date.proleptic.gregorian", false, + "Should we write date using the proleptic Gregorian calendar instead of the hybrid Julian Gregorian?\n" + + "Hybrid is the default."), + HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT("hive.parquet.date.proleptic.gregorian.default", false, + "This value controls whether date type in Parquet files was written using the hybrid or proleptic\n" + + "calendar. Hybrid is the default."), HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION("hive.avro.timestamp.skip.conversion", false, "Some older Hive implementations (pre-3.1) wrote Avro timestamps in a UTC-normalized" + "manner, while from version 3.1 until now Hive wrote time zone agnostic timestamps. " + "Setting this flag to true will treat legacy timestamps as time zone agnostic. Setting " + "it to false will treat legacy timestamps as UTC-normalized. This flag will not affect " + "timestamps written after this change."), + HIVE_AVRO_PROLEPTIC_GREGORIAN("hive.avro.proleptic.gregorian", false, + "Should we write date and timestamp using the proleptic Gregorian calendar instead of the hybrid Julian Gregorian?\n" + + "Hybrid is the default."), + HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT("hive.avro.proleptic.gregorian.default", false, + "This value controls whether date and timestamp type in Avro files was written using the hybrid or proleptic\n" + + "calendar. Hybrid is the default."), HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS("hive.int.timestamp.conversion.in.seconds", false, "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" + "Set this flag to true to interpret the value as seconds to be consistent with float/double." ), diff --git a/data/files/avro_date.txt b/data/files/avro_date.txt index 0858896e5e..939db2603c 100644 --- a/data/files/avro_date.txt +++ b/data/files/avro_date.txt @@ -2,3 +2,7 @@ 2014-02-11|baz:1981-12-16|2011-09-05 1947-02-11|baz:1921-12-16|2011-09-05 8200-02-11|baz:6981-12-16|1039-09-05 +1411-02-21|foo:0980-12-16,bar:0998-05-07|0011-09-04,1411-09-05 +1211-02-11|baz:0981-12-16|0011-09-05 +0849-02-11|baz:0921-12-16|0011-09-05 +0605-02-11|baz:0981-12-16|0039-09-05 diff --git a/data/files/avro_legacy_mixed_dates.avro b/data/files/avro_legacy_mixed_dates.avro new file mode 100644 index 0000000000..f80f6d9d1b Binary files /dev/null and b/data/files/avro_legacy_mixed_dates.avro differ diff --git a/data/files/avro_legacy_mixed_timestamps.avro b/data/files/avro_legacy_mixed_timestamps.avro new file mode 100644 index 0000000000..690f5bdf48 Binary files /dev/null and b/data/files/avro_legacy_mixed_timestamps.avro differ diff --git a/data/files/avro_timestamp.txt b/data/files/avro_timestamp.txt index a989f0e0b7..6af27ba387 100644 --- a/data/files/avro_timestamp.txt +++ b/data/files/avro_timestamp.txt @@ -1,4 +1,8 @@ 2012-02-21 07:08:09.123|foo:1980-12-16 07:08:09.123,bar:1998-05-07 07:08:09.123|2011-09-04 07:08:09.123,2011-09-05 07:08:09.123 2014-02-11 07:08:09.123|baz:1981-12-16 07:08:09.123|2011-09-05 07:08:09.123 1947-02-11 07:08:09.123|baz:1921-12-16 07:08:09.123|2011-09-05 07:08:09.123 -8200-02-11 07:08:09.123|baz:6981-12-16 07:08:09.123|1039-09-05 07:08:09.123 \ No newline at end of file +8200-02-11 07:08:09.123|baz:6981-12-16 07:08:09.123|1039-09-05 07:08:09.123 +1412-02-21 07:08:09.123|foo:0980-12-16 07:08:09.123,bar:0998-05-07 07:08:09.123|0011-09-04 07:08:09.123,0011-09-05 07:08:09.123 +1214-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0011-09-05 07:08:09.123 +0847-02-11 07:08:09.123|baz:0921-12-16 07:08:09.123|0011-09-05 07:08:09.123 +0600-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0039-09-05 07:08:09.123 \ No newline at end of file diff --git a/data/files/orc_legacy_mixed_dates.orc b/data/files/orc_legacy_mixed_dates.orc new file mode 100644 index 0000000000..94c561dcbe Binary files /dev/null and b/data/files/orc_legacy_mixed_dates.orc differ diff --git a/data/files/orc_legacy_mixed_timestamps.orc b/data/files/orc_legacy_mixed_timestamps.orc new file mode 100644 index 0000000000..137fb25449 Binary files /dev/null and b/data/files/orc_legacy_mixed_timestamps.orc differ diff --git a/data/files/parquet_legacy_mixed_dates.parq b/data/files/parquet_legacy_mixed_dates.parq new file mode 100644 index 0000000000..b1dbacdd34 Binary files /dev/null and b/data/files/parquet_legacy_mixed_dates.parq differ diff --git a/data/files/parquet_legacy_mixed_timestamps.parq b/data/files/parquet_legacy_mixed_timestamps.parq new file mode 100644 index 0000000000..84aaf1ce08 Binary files /dev/null and b/data/files/parquet_legacy_mixed_timestamps.parq differ diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2aa9043744..46108309da 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -469,6 +469,12 @@ minillaplocal.query.files=\ auto_sortmerge_join_7.q,\ auto_sortmerge_join_8.q,\ auto_sortmerge_join_9.q,\ + avro_hybrid_mixed_date.q,\ + avro_hybrid_mixed_timestamp.q,\ + avro_legacy_mixed_date.q,\ + avro_legacy_mixed_timestamp.q,\ + avro_proleptic_mixed_date.q,\ + avro_proleptic_mixed_timestamp.q,\ bucket4.q,\ bucket_groupby.q,\ bucket_many.q,\ @@ -663,12 +669,23 @@ minillaplocal.query.files=\ orc_ppd_decimal.q,\ orc_ppd_timestamp.q,\ order_null.q,\ + parquet_hybrid_mixed_date.q,\ + parquet_hybrid_mixed_timestamp.q,\ + parquet_legacy_mixed_date.q,\ + parquet_legacy_mixed_timestamp.q,\ + parquet_proleptic_mixed_date.q,\ partition_ctas.q,\ partition_multilevels.q,\ partition_shared_scan.q,\ partition_pruning.q,\ ptf.q,\ ptf_streaming.q,\ + orc_hybrid_mixed_date.q,\ + orc_hybrid_mixed_timestamp.q,\ + orc_legacy_mixed_date.q,\ + orc_legacy_mixed_timestamp.q,\ + orc_proleptic_mixed_date.q,\ + orc_proleptic_mixed_timestamp.q,\ runtime_stats_merge.q,\ quotedid_smb.q,\ reducesink_dedup.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java index af853e3075..16176929de 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java @@ -52,6 +52,7 @@ import org.apache.orc.CompressionKind; import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; +import org.apache.orc.OrcProto.CalendarKind; import org.apache.orc.OrcProto.ColumnEncoding; import org.apache.orc.OrcProto.RowIndex; import org.apache.orc.OrcProto.RowIndexEntry; @@ -292,5 +293,10 @@ public TypeDescription getSchema() { public OrcFile.Version getFileVersion() { return null; } + + @Override + public CalendarKind getCalendar() { + return CalendarKind.JULIAN_GREGORIAN; + } } } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index 83931c27b1..1b41d4e476 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -44,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.orc.CompressionCodec; +import org.apache.orc.OrcProto.CalendarKind; import org.apache.orc.impl.PositionProvider; import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedTreeReaderFactory; @@ -222,7 +224,8 @@ private void createColumnReaders(OrcEncodedColumnBatch batch, .setSchemaEvolution(evolution).skipCorrupt(skipCorrupt) .writerTimeZone(stripeMetadata.getWriterTimezone()) .fileFormat(fileMetadata == null ? null : fileMetadata.getFileVersion()) - .useUTCTimestamp(true); + .useUTCTimestamp(true) + .setProlepticGregorian(fileMetadata != null && fileMetadata.getCalendar() == CalendarKind.PROLEPTIC_GREGORIAN, true); this.batchSchemas = includes.getBatchReaderTypes(fileSchema); StructTreeReader treeReader = EncodedTreeReaderFactory.createRootTreeReader( batchSchemas, stripeMetadata.getEncodings(), batch, codec, context, useDecimal64ColumnVectors); @@ -244,8 +247,9 @@ private ColumnVector createColumn(TypeDescription type, int batchSize, final boo case SHORT: case INT: case LONG: - case DATE: return new LongColumnVector(batchSize); + case DATE: + return new DateColumnVector(batchSize); case FLOAT: case DOUBLE: return new DoubleColumnVector(batchSize); diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 2893870c75..92df717791 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -82,6 +82,7 @@ import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; import org.apache.orc.OrcProto.BloomFilterIndex; +import org.apache.orc.OrcProto.CalendarKind; import org.apache.orc.OrcProto.FileTail; import org.apache.orc.OrcProto.RowIndex; import org.apache.orc.OrcProto.Stream; @@ -798,7 +799,7 @@ private boolean determineRgsToRead(int rowIndexStride, sargApp = new RecordReaderImpl.SargApplier(sarg, rowIndexStride, evolution, OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, fileMetadata.getWriterVersionNum()), - true); + true, fileMetadata.getCalendar() == CalendarKind.PROLEPTIC_GREGORIAN, true); } boolean hasAnyData = false; // stripeRgs should have been initialized by this time with an empty array. diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java index d6b16efe29..7191e16f3f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java @@ -22,6 +22,7 @@ import org.apache.orc.CompressionKind; import org.apache.orc.FileFormatException; import org.apache.orc.OrcFile; +import org.apache.orc.OrcProto.CalendarKind; import org.apache.orc.OrcProto.Type; import org.apache.orc.TypeDescription; @@ -31,4 +32,5 @@ List getTypes(); TypeDescription getSchema() throws FileFormatException; OrcFile.Version getFileVersion(); + CalendarKind getCalendar(); } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java index 5eb713cc68..5b5bde9ea2 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java @@ -26,6 +26,7 @@ import org.apache.orc.FileMetadata; import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; +import org.apache.orc.OrcProto.CalendarKind; import org.apache.orc.OrcProto.StripeStatistics; import org.apache.orc.OrcUtils; import org.apache.orc.StripeInformation; @@ -51,6 +52,7 @@ private final long numberOfRows; private final boolean isOriginalFormat; private final OrcFile.Version fileVersion; + private final CalendarKind calendar; public OrcFileMetadata(Object fileKey, OrcProto.Footer footer, OrcProto.PostScript ps, List stats, List stripes, final OrcFile.Version fileVersion) { @@ -69,6 +71,7 @@ public OrcFileMetadata(Object fileKey, OrcProto.Footer footer, OrcProto.PostScri this.fileStats = footer.getStatisticsList(); this.fileKey = fileKey; this.fileVersion = fileVersion; + this.calendar = footer.getCalendar(); } // FileMetadata @@ -170,4 +173,9 @@ public TypeDescription getSchema() throws FileFormatException { public OrcFile.Version getFileVersion() { return fileVersion; } + + @Override + public CalendarKind getCalendar() { + return calendar; + } } diff --git a/pom.xml b/pom.xml index ba87106c6a..030ab71d6b 100644 --- a/pom.xml +++ b/pom.xml @@ -189,7 +189,7 @@ 2.12.1 2.5.0 2.3 - 1.5.8 + 1.5.9-SNAPSHOT 1.10.19 1.7.4 2.0.0-M5 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index ec24c10845..46705ecd47 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -137,9 +137,10 @@ public static ColumnVector createColumnVector(TypeInfo typeInfo, case SHORT: case INT: case LONG: - case DATE: case INTERVAL_YEAR_MONTH: return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case DATE: + return new DateColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case TIMESTAMP: return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case INTERVAL_DAY_TIME: @@ -574,13 +575,14 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect return typeInfoList.toArray(new TypeInfo[0]); } - public static ColumnVector makeLikeColumnVector(ColumnVector source - ) throws HiveException{ + public static ColumnVector makeLikeColumnVector(ColumnVector source) throws HiveException{ if (source instanceof Decimal64ColumnVector) { Decimal64ColumnVector dec64ColVector = (Decimal64ColumnVector) source; return new DecimalColumnVector(dec64ColVector.vector.length, dec64ColVector.precision, dec64ColVector.scale); + } else if (source instanceof DateColumnVector) { + return new DateColumnVector(((DateColumnVector) source).vector.length); } else if (source instanceof LongColumnVector) { return new LongColumnVector(((LongColumnVector) source).vector.length); } else if (source instanceof DoubleColumnVector) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java index be7d8b7ca1..fd10e08e45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java @@ -31,6 +31,7 @@ import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,6 +81,8 @@ // add writer.time.zone property to file metadata dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString()); + dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf( + HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN))); dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java index 1927e0e6e2..f27cb230b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java @@ -64,6 +64,7 @@ final private long start; final private long stop; private ZoneId writerTimezone; + private Boolean writerProleptic; protected JobConf jobConf; final private boolean isEmptyInput; /** @@ -102,6 +103,7 @@ public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) this.recordReaderID = new UID(); this.writerTimezone = extractWriterTimezoneFromMetadata(job, split, gdr); + this.writerProleptic = extractWriterProlepticFromMetadata(job, split, gdr); } /** @@ -171,6 +173,28 @@ private ZoneId extractWriterTimezoneFromMetadata(JobConf job, FileSplit split, return null; } + private Boolean extractWriterProlepticFromMetadata(JobConf job, FileSplit split, + GenericDatumReader gdr) throws IOException { + if (job == null || gdr == null || split == null || split.getPath() == null) { + return null; + } + try { + DataFileReader dataFileReader = + new DataFileReader(new FsInput(split.getPath(), job), gdr); + if (dataFileReader.getMeta(AvroSerDe.WRITER_PROLEPTIC) != null) { + try { + return Boolean.valueOf(new String(dataFileReader.getMeta(AvroSerDe.WRITER_PROLEPTIC), + StandardCharsets.UTF_8)); + } catch (DateTimeException e) { + throw new RuntimeException("Can't parse writer proleptic property stored in file metadata", e); + } + } + } catch (IOException e) { + // Can't access metadata, carry on. + } + return null; + } + private boolean pathIsInPartition(Path split, Path partitionPath) { boolean schemeless = split.toUri().getScheme() == null; if (schemeless) { @@ -203,7 +227,7 @@ public NullWritable createKey() { @Override public AvroGenericRecordWritable createValue() { - return new AvroGenericRecordWritable(writerTimezone); + return new AvroGenericRecordWritable(writerTimezone, writerProleptic); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index e246ac24a5..349eb254f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -68,6 +68,7 @@ public static Reader createReader(FileSystem fs, public ReaderOptions(Configuration conf) { super(conf); useUTCTimestamp(true); + convertToProlepticGregorian(true); } public ReaderOptions filesystem(FileSystem fs) { @@ -94,6 +95,11 @@ public ReaderOptions useUTCTimestamp(boolean value) { super.useUTCTimestamp(value); return this; } + + public ReaderOptions convertToProlepticGregorian(boolean value) { + super.convertToProlepticGregorian(value); + return this; + } } public static ReaderOptions readerOptions(Configuration conf) { @@ -331,6 +337,11 @@ public WriterOptions useUTCTimestamp(boolean value) { return this; } + public WriterOptions setProlepticGregorian(boolean value) { + super.setProlepticGregorian(value); + return this; + } + ObjectInspector getInspector() { return inspector; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 889bd586d7..d0a6c6e25a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -433,7 +434,7 @@ static DateWritableV2 nextDate(ColumnVector vector, } else { result = (DateWritableV2) previous; } - int date = (int) ((LongColumnVector) vector).vector[row]; + int date = (int) ((DateColumnVector) vector).vector[row]; result.set(date); return result; } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 4082c61237..58a0c54b7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -204,12 +205,14 @@ static void setColumn(int rowId, ColumnVector column, case TIMESTAMP: { TimestampColumnVector vector = (TimestampColumnVector) column; vector.setIsUTC(true); + vector.setUsingProlepticCalendar(true); vector.set(rowId, ((TimestampObjectInspector) inspector) .getPrimitiveJavaObject(obj).toSqlTimestamp()); break; } case DATE: { - LongColumnVector vector = (LongColumnVector) column; + DateColumnVector vector = (DateColumnVector) column; + vector.setUsingProlepticCalendar(true); vector.vector[rowId] = ((DateObjectInspector) inspector) .getPrimitiveWritableObject(obj).getDays(); break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java index 91a02feb20..577051d0aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -54,6 +54,7 @@ protected Path file; protected ProjectionPusher projectionPusher; protected boolean skipTimestampConversion = false; + protected Boolean skipProlepticConversion; protected SerDeStats serDeStats; protected JobConf jobConf; @@ -130,6 +131,13 @@ protected ParquetInputSplit getSplit( if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); } + skipProlepticConversion = DataWritableReadSupport + .getWriterDateProleptic(fileMetaData.getKeyValueMetaData()); + if (skipProlepticConversion == null) { + skipProlepticConversion = HiveConf.getBoolVar( + conf, HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT); + } + split = new ParquetInputSplit(finalPath, splitStart, splitLength, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 89dfe2d6b7..f4aa2e7b9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -667,7 +668,14 @@ PrimitiveConverter getConverter(final PrimitiveType type, final int index, final return new PrimitiveConverter() { @Override public void addInt(final int value) { - parent.set(index, new DateWritableV2(value)); + Map metadata = parent.getMetadata(); + Boolean skipProlepticConversion = DataWritableReadSupport.getWriterDateProleptic(metadata); + if (skipProlepticConversion == null) { + skipProlepticConversion = Boolean.parseBoolean( + metadata.get(HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT.varname)); + } + parent.set(index, + new DateWritableV2(skipProlepticConversion ? value : CalendarUtils.convertDateToProleptic(value))); } }; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 30f3d1737d..073f563df5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; @@ -284,6 +285,25 @@ public static ZoneId getWriterTimeZoneId(Map metadata) { return null; } + /** + * Get the proleptic from some metadata, otherwise return null. + */ + public static Boolean getWriterDateProleptic(Map metadata) { + if (metadata == null) { + return null; + } + String value = metadata.get(DataWritableWriteSupport.WRITER_DATE_PROLEPTIC); + try { + if (value != null) { + return Boolean.valueOf(value); + } + } catch (DateTimeException e) { + throw new RuntimeException("Can't parse writer proleptic property stored in file metadata", e); + } + + return null; + } + /** * Return the columns which contains required nested attribute level * E.g., given struct a: while 'x' is required and 'y' is not, the method will return @@ -487,6 +507,22 @@ private static MessageType getRequestedPrunedSchema( + "file footer's writer time zone."); } + String writerProleptic = DataWritableWriteSupport.WRITER_DATE_PROLEPTIC; + if (!metadata.containsKey(writerProleptic)) { + if (keyValueMetaData.containsKey(writerProleptic)) { + metadata.put(writerProleptic, keyValueMetaData.get(writerProleptic)); + } + } else if (!metadata.get(writerProleptic).equals(keyValueMetaData.get(writerProleptic))) { + throw new IllegalStateException("Metadata contains a writer proleptic property value that does not match " + + "file footer's value."); + } + + String prolepticDefault = ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT.varname; + if (!metadata.containsKey(prolepticDefault)) { + metadata.put(prolepticDefault, String.valueOf(HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT))); + } + return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index f1cce81b98..f9d0a5629b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -16,6 +16,7 @@ import java.time.ZoneId; import java.time.ZoneOffset; import java.util.Calendar; +import java.util.Date; import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; @@ -40,7 +41,10 @@ private static Calendar getGMTCalendar() { //Calendar.getInstance calculates the current-time needlessly, so cache an instance. if (parquetGMTCalendar.get() == null) { - parquetGMTCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); + GregorianCalendar calendar = new GregorianCalendar(); + calendar.setTimeZone(TimeZone.getTimeZone("GMT")); + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); + parquetGMTCalendar.set(calendar); } parquetGMTCalendar.get().clear(); return parquetGMTCalendar.get(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java index e8fcb6b214..396d1f04f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java @@ -55,6 +55,7 @@ protected boolean skipTimestampConversion = false; protected ZoneId writerTimezone = null; + protected boolean skipProlepticConversion = false; /** * Total number of values read. @@ -119,6 +120,7 @@ public BaseVectorizedColumnReader( PageReader pageReader, boolean skipTimestampConversion, ZoneId writerTimezone, + boolean skipProlepticConversion, Type parquetType, TypeInfo hiveType) throws IOException { this.descriptor = descriptor; this.type = parquetType; @@ -126,6 +128,7 @@ public BaseVectorizedColumnReader( this.maxDefLevel = descriptor.getMaxDefinitionLevel(); this.skipTimestampConversion = skipTimestampConversion; this.writerTimezone = writerTimezone; + this.skipProlepticConversion = skipProlepticConversion; this.hiveType = hiveType; DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index 5c1ce70075..108db44a4a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -48,9 +48,9 @@ boolean isFirstRow = true; public VectorizedListColumnReader(ColumnDescriptor descriptor, PageReader pageReader, - boolean skipTimestampConversion, ZoneId writerTimezone, Type type, TypeInfo hiveType) - throws IOException { - super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType); + boolean skipTimestampConversion, ZoneId writerTimezone, boolean skipProlepticConversion, + Type type, TypeInfo hiveType) throws IOException { + super(descriptor, pageReader, skipTimestampConversion, writerTimezone, skipProlepticConversion, type, hiveType); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index ea6dfb8a88..2104746365 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -456,13 +456,13 @@ private void checkEndOfRowGroup() throws IOException { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(colsToInclude.get(i)), types.get(i), - pages, requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0); + pages, requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, skipProlepticConversion, 0); } } } else { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(i), types.get(i), pages, - requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0); + requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, skipProlepticConversion, 0); } } @@ -506,6 +506,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( List columnDescriptors, boolean skipTimestampConversion, ZoneId writerTimezone, + boolean skipProlepticConversion, int depth) throws IOException { List descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors); @@ -517,8 +518,8 @@ private VectorizedColumnReader buildVectorizedParquetReader( } if (fileSchema.getColumns().contains(descriptors.get(0))) { return new VectorizedPrimitiveColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, type, - typeInfo); + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, skipProlepticConversion, + type, typeInfo); } else { // Support for schema evolution return new VectorizedDummyColumnReader(); @@ -531,7 +532,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( for (int i = 0; i < fieldTypes.size(); i++) { VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, - skipTimestampConversion, writerTimezone, depth + 1); + skipTimestampConversion, writerTimezone, skipProlepticConversion, depth + 1); if (r != null) { fieldReaders.add(r); } else { @@ -549,9 +550,8 @@ private VectorizedColumnReader buildVectorizedParquetReader( } return new VectorizedListColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, - getElementType(type), - typeInfo); + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, skipProlepticConversion, + getElementType(type), typeInfo); case MAP: if (columnDescriptors == null || columnDescriptors.isEmpty()) { throw new RuntimeException( @@ -583,10 +583,10 @@ private VectorizedColumnReader buildVectorizedParquetReader( List kvTypes = groupType.getFields(); VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader( descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, - writerTimezone, kvTypes.get(0), typeInfo); + writerTimezone, skipProlepticConversion, kvTypes.get(0), typeInfo); VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader( descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, - writerTimezone, kvTypes.get(1), typeInfo); + writerTimezone, skipProlepticConversion, kvTypes.get(1), typeInfo); return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader); case UNION: default: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java index 1a861f348b..b16ef7bf65 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java @@ -13,13 +13,14 @@ */ package org.apache.hadoop.hive.ql.io.parquet.vector; -import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -49,10 +50,11 @@ public VectorizedPrimitiveColumnReader( PageReader pageReader, boolean skipTimestampConversion, ZoneId writerTimezone, + boolean skipProlepticConversion, Type type, TypeInfo hiveType) throws IOException { - super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType); + super(descriptor, pageReader, skipTimestampConversion, writerTimezone, skipProlepticConversion, type, hiveType); } @Override @@ -102,6 +104,8 @@ private void readBatchHelper( readSmallInts(num, (LongColumnVector) column, rowId); break; case DATE: + readDate(num, (DateColumnVector) column, rowId); + break; case INTERVAL_YEAR_MONTH: case LONG: readLongs(num, (LongColumnVector) column, rowId); @@ -436,7 +440,34 @@ private void readBinaries( } } + private void readDate( + int total, + DateColumnVector c, + int rowId) throws IOException { + c.setUsingProlepticCalendar(true); + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = skipProlepticConversion ? + dataColumn.readLong() : CalendarUtils.convertDateToProleptic((int) dataColumn.readLong()); + if (dataColumn.isValid()) { + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.vector[rowId] = 0; + setNullValue(c, rowId); + } + } else { + setNullValue(c, rowId); + } + rowId++; + left--; + } + } + private void readTimestamp(int total, TimestampColumnVector c, int rowId) throws IOException { + c.setUsingProlepticCalendar(true); int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -511,6 +542,19 @@ private void decodeDictionaryIds( } break; case DATE: + DateColumnVector dc = (DateColumnVector) column; + dc.setUsingProlepticCalendar(true); + for (int i = rowId; i < rowId + num; ++i) { + dc.vector[i] = + skipProlepticConversion ? + dictionary.readLong((int) dictionaryIds.vector[i]) : + CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i])); + if (!dictionary.isValid()) { + setNullValue(column, i); + dc.vector[i] = 0; + } + } + break; case INTERVAL_YEAR_MONTH: case LONG: for (int i = rowId; i < rowId + num; ++i) { @@ -589,9 +633,10 @@ private void decodeDictionaryIds( } break; case TIMESTAMP: + TimestampColumnVector tsc = (TimestampColumnVector) column; + tsc.setUsingProlepticCalendar(true); for (int i = rowId; i < rowId + num; ++i) { - ((TimestampColumnVector) column) - .set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp()); + tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp()); } break; case INTERVAL_DAY_TIME: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java index 8acde81a3d..f4212f4481 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java @@ -18,6 +18,7 @@ import java.util.TimeZone; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hive.common.util.HiveVersionInfo; @@ -35,9 +36,11 @@ public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema"; public static final String WRITER_TIMEZONE = "writer.time.zone"; + public static final String WRITER_DATE_PROLEPTIC = "writer.date.proleptic"; private DataWritableWriter writer; private MessageType schema; + private boolean defaultDateProleptic; public static void setSchema(final MessageType schema, final Configuration configuration) { configuration.set(PARQUET_HIVE_SCHEMA, schema.toString()); @@ -52,12 +55,15 @@ public WriteContext init(final Configuration configuration) { schema = getSchema(configuration); Map metaData = new HashMap<>(); metaData.put(WRITER_TIMEZONE, TimeZone.getDefault().toZoneId().toString()); + defaultDateProleptic = HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN); + metaData.put(WRITER_DATE_PROLEPTIC, String.valueOf(defaultDateProleptic)); return new WriteContext(schema, metaData); } @Override public void prepareForWrite(final RecordConsumer recordConsumer) { - writer = new DataWritableWriter(recordConsumer, schema); + writer = new DataWritableWriter(recordConsumer, schema, defaultDateProleptic); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 3d61c33afd..116f0fd1c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -18,6 +18,7 @@ import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -62,14 +63,17 @@ private static final Logger LOG = LoggerFactory.getLogger(DataWritableWriter.class); protected final RecordConsumer recordConsumer; private final GroupType schema; + private final boolean defaultDateProleptic; /* This writer will be created when writing the first row in order to get information about how to inspect the record data. */ private DataWriter messageWriter; - public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema) { + public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema, + final boolean defaultDateProleptic) { this.recordConsumer = recordConsumer; this.schema = schema; + this.defaultDateProleptic = defaultDateProleptic; } /** @@ -550,7 +554,9 @@ public DateDataWriter(DateObjectInspector inspector) { @Override public void write(Object value) { Date vDate = inspector.getPrimitiveJavaObject(value); - recordConsumer.addInteger(DateWritableV2.dateToDays(vDate)); + recordConsumer.addInteger( + defaultDateProleptic ? DateWritableV2.dateToDays(vDate) : + CalendarUtils.convertDateToHybrid(DateWritableV2.dateToDays(vDate))); } } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index ff8884172d..bb149bc93b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -192,10 +193,13 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, case SHORT: case INT: case LONG: - case DATE: colVector = new LongColumnVector(); break; + case DATE: + colVector = new DateColumnVector(); + break; + case FLOAT: case DOUBLE: colVector = new DoubleColumnVector(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 220431a444..154fe12640 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -1312,7 +1312,8 @@ public void createOrcDateFile(Path file, int minYear, int maxYear .inspector(inspector) .stripeSize(100000) .bufferSize(10000) - .blockPadding(false)); + .blockPadding(false) + .setProlepticGregorian(true)); OrcStruct row = new OrcStruct(2); for (int year = minYear; year < maxYear; ++year) { for (int ms = 1000; ms < 2000; ++ms) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java index b242392a9a..01d9d2e27a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java @@ -192,7 +192,7 @@ private ParquetHiveRecord getParquetWritable(String columnNames, String columnTy private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); - DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); + DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema, false); hiveParquetWriter.write(record); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index 9ea78508ee..bc5e560629 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -15,9 +15,11 @@ import java.time.ZoneId; import java.util.Calendar; +import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; @@ -41,12 +43,13 @@ @Test public void testJulianDay() { //check if May 23, 1968 is Julian Day 2440000 - Calendar cal = Calendar.getInstance(); + GregorianCalendar cal = new GregorianCalendar(); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + cal.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal.set(Calendar.YEAR, 1968); cal.set(Calendar.MONTH, Calendar.MAY); cal.set(Calendar.DAY_OF_MONTH, 23); cal.set(Calendar.HOUR_OF_DAY, 0); - cal.setTimeZone(TimeZone.getTimeZone("GMT")); Timestamp ts = Timestamp.ofEpochMilli(cal.getTimeInMillis()); NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); @@ -56,12 +59,13 @@ public void testJulianDay() { Assert.assertEquals(tsFetched, ts); //check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005. - Calendar cal1 = Calendar.getInstance(); + GregorianCalendar cal1 = new GregorianCalendar(); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + cal1.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal1.set(Calendar.YEAR, 2005); cal1.set(Calendar.MONTH, Calendar.JANUARY); cal1.set(Calendar.DAY_OF_MONTH, 1); cal1.set(Calendar.HOUR_OF_DAY, 0); - cal1.setTimeZone(TimeZone.getTimeZone("GMT")); Timestamp ts1 = Timestamp.ofEpochMilli(cal1.getTimeInMillis()); NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, false); @@ -69,12 +73,13 @@ public void testJulianDay() { Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); Assert.assertEquals(ts1Fetched, ts1); - Calendar cal2 = Calendar.getInstance(); + GregorianCalendar cal2 = new GregorianCalendar(); + cal2.setTimeZone(TimeZone.getTimeZone("UTC")); + cal2.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal2.set(Calendar.YEAR, 2005); cal2.set(Calendar.MONTH, Calendar.JANUARY); cal2.set(Calendar.DAY_OF_MONTH, 31); cal2.set(Calendar.HOUR_OF_DAY, 0); - cal2.setTimeZone(TimeZone.getTimeZone("UTC")); Timestamp ts2 = Timestamp.ofEpochMilli(cal2.getTimeInMillis()); NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, false); @@ -86,12 +91,13 @@ public void testJulianDay() { // check if 730517 Julian Days between Jan 1, 0005 and Jan 31, 2005. // This method used to test Julian Days between Jan 1, 2005 BCE and Jan 1, 2005 CE. Since BCE // timestamps are not supported, both dates were changed to CE. - cal1 = Calendar.getInstance(); + cal1 = new GregorianCalendar(); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + cal1.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal1.set(Calendar.YEAR, 0005); cal1.set(Calendar.MONTH, Calendar.JANUARY); cal1.set(Calendar.DAY_OF_MONTH, 1); cal1.set(Calendar.HOUR_OF_DAY, 0); - cal1.setTimeZone(TimeZone.getTimeZone("GMT")); ts1 = Timestamp.ofEpochMilli(cal1.getTimeInMillis()); nt1 = NanoTimeUtils.getNanoTime(ts1, false); @@ -99,20 +105,27 @@ public void testJulianDay() { ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); Assert.assertEquals(ts1Fetched, ts1); - cal2 = Calendar.getInstance(); + cal2 = new GregorianCalendar(); + cal2.setTimeZone(TimeZone.getTimeZone("UTC")); + cal2.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal2.set(Calendar.YEAR, 2005); cal2.set(Calendar.MONTH, Calendar.JANUARY); cal2.set(Calendar.DAY_OF_MONTH, 31); cal2.set(Calendar.HOUR_OF_DAY, 0); - cal2.setTimeZone(TimeZone.getTimeZone("UTC")); ts2 = Timestamp.ofEpochMilli(cal2.getTimeInMillis()); nt2 = NanoTimeUtils.getNanoTime(ts2, false); ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); Assert.assertEquals(ts2Fetched, ts2); - Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 730517); -} + Assert.assertEquals(730517, nt2.getJulianDay() - nt1.getJulianDay()); + + Date d1 = Date.ofEpochMilli(cal1.getTimeInMillis()); + Assert.assertEquals("0005-01-01", d1.toString()); + + Date d2 = Date.ofEpochMilli(cal2.getTimeInMillis()); + Assert.assertEquals("2005-01-31", d2.toString()); + } @Test public void testNanos() { diff --git a/ql/src/test/queries/clientpositive/avro_hybrid_mixed_date.q b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_date.q new file mode 100644 index 0000000000..5d9807b370 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_date.q @@ -0,0 +1,22 @@ +create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q new file mode 100644 index 0000000000..28fc99c51b --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q @@ -0,0 +1,22 @@ +create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/avro_legacy_mixed_date.q b/ql/src/test/queries/clientpositive/avro_legacy_mixed_date.q new file mode 100644 index 0000000000..437e432080 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_legacy_mixed_date.q @@ -0,0 +1,14 @@ +create table legacy_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +load data local inpath '../../data/files/avro_legacy_mixed_dates.avro' into table legacy_table; + +select * from legacy_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; diff --git a/ql/src/test/queries/clientpositive/avro_legacy_mixed_timestamp.q b/ql/src/test/queries/clientpositive/avro_legacy_mixed_timestamp.q new file mode 100644 index 0000000000..e1e6870eb6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_legacy_mixed_timestamp.q @@ -0,0 +1,14 @@ +create table legacy_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +load data local inpath '../../data/files/avro_legacy_mixed_timestamps.avro' into table legacy_table; + +select * from legacy_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; diff --git a/ql/src/test/queries/clientpositive/avro_proleptic_mixed_date.q b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_date.q new file mode 100644 index 0000000000..401f0a6b50 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_date.q @@ -0,0 +1,24 @@ +set hive.avro.proleptic.gregorian=true; + +create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q new file mode 100644 index 0000000000..5a67ab59d2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q @@ -0,0 +1,24 @@ +set hive.avro.proleptic.gregorian=true; + +create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_hybrid_mixed_date.q b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_date.q new file mode 100644 index 0000000000..bf71ab3b34 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_date.q @@ -0,0 +1,20 @@ +create table hybrid_table (d date) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_hybrid_mixed_timestamp.q b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_timestamp.q new file mode 100644 index 0000000000..0fd80294fe --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_timestamp.q @@ -0,0 +1,20 @@ +create table hybrid_table (d timestamp) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_legacy_mixed_date.q b/ql/src/test/queries/clientpositive/orc_legacy_mixed_date.q new file mode 100644 index 0000000000..451c9834f1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_legacy_mixed_date.q @@ -0,0 +1,12 @@ +create table legacy_table (d date) +stored as orc; + +load data local inpath '../../data/files/orc_legacy_mixed_dates.orc' into table legacy_table; + +select * from legacy_table; + +set orc.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/orc_legacy_mixed_timestamp.q b/ql/src/test/queries/clientpositive/orc_legacy_mixed_timestamp.q new file mode 100644 index 0000000000..6488f4dc99 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_legacy_mixed_timestamp.q @@ -0,0 +1,12 @@ +create table legacy_table (ts timestamp) +stored as orc; + +load data local inpath '../../data/files/orc_legacy_mixed_timestamps.orc' into table legacy_table; + +select * from legacy_table; + +set orc.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/orc_proleptic_mixed_date.q b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_date.q new file mode 100644 index 0000000000..55aaede8b9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_date.q @@ -0,0 +1,22 @@ +set orc.proleptic.gregorian=true; + +create table hybrid_table (d date) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_proleptic_mixed_timestamp.q b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_timestamp.q new file mode 100644 index 0000000000..92d91e42fd --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_timestamp.q @@ -0,0 +1,22 @@ +set orc.proleptic.gregorian=true; + +create table hybrid_table (d timestamp) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_date.q b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_date.q new file mode 100644 index 0000000000..67a0cee90b --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_date.q @@ -0,0 +1,20 @@ +create table hybrid_table (d date) +stored as parquet; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set hive.parquet.date.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_timestamp.q b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_timestamp.q new file mode 100644 index 0000000000..4c64b7efd2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_timestamp.q @@ -0,0 +1,16 @@ +create table hybrid_table (d timestamp) +stored as parquet; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/parquet_legacy_mixed_date.q b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_date.q new file mode 100644 index 0000000000..bf2345c3af --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_date.q @@ -0,0 +1,12 @@ +create table legacy_table (d date) +stored as parquet; + +load data local inpath '../../data/files/parquet_legacy_mixed_dates.parq' into table legacy_table; + +select * from legacy_table; + +set hive.parquet.date.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_legacy_mixed_timestamp.q b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_timestamp.q new file mode 100644 index 0000000000..280df40076 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_timestamp.q @@ -0,0 +1,8 @@ +create table legacy_table (d timestamp) +stored as parquet; + +load data local inpath '../../data/files/parquet_legacy_mixed_timestamps.parq' into table legacy_table; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q index 82085beea3..8027e9184b 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_date.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q @@ -103,3 +103,31 @@ select * from newtypestbl_n2 where da between '1970-02-18' and '1970-02-19'; set hive.optimize.index.filter=true; select * from newtypestbl_n2 where da between '1970-02-18' and '1970-02-19'; + +insert overwrite table newtypestbl_n2 select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("999-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1820-02-27" as date) from src src2 limit 10) uniontbl; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); + +set hive.vectorized.execution.enabled=true; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); diff --git a/ql/src/test/queries/clientpositive/parquet_proleptic_mixed_date.q b/ql/src/test/queries/clientpositive/parquet_proleptic_mixed_date.q new file mode 100644 index 0000000000..17b5448738 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_proleptic_mixed_date.q @@ -0,0 +1,22 @@ +set hive.parquet.date.proleptic.gregorian=true; + +create table proleptic_table (d date) +stored as parquet; + +INSERT INTO proleptic_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from proleptic_table; + +set hive.parquet.date.proleptic.gregorian.default=true; + +select * from proleptic_table; + +drop table proleptic_table; diff --git a/ql/src/test/results/clientpositive/avro_date.q.out b/ql/src/test/results/clientpositive/avro_date.q.out index 32501cf9f1..ff969a1ffe 100644 --- a/ql/src/test/results/clientpositive/avro_date.q.out +++ b/ql/src/test/results/clientpositive/avro_date.q.out @@ -73,6 +73,10 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 +1411-02-21 {"bar":"0998-05-07","foo":"0980-12-16"} ["0011-09-04","1411-09-05"] 2 2014-09-26 +1211-02-11 {"baz":"0981-12-16"} ["0011-09-05"] 2 2014-09-26 +0849-02-11 {"baz":"0921-12-16"} ["0011-09-05"] 2 2014-09-26 +0605-02-11 {"baz":"0981-12-16"} ["0039-09-05"] 2 2014-09-26 PREHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d PREHOOK: type: QUERY PREHOOK: Input: default@avro_date @@ -83,6 +87,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_date POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 #### A masked pattern was here #### +0605-02-11 1 +0849-02-11 1 +1211-02-11 1 +1411-02-21 1 1947-02-11 1 2012-02-21 1 2014-02-11 1 @@ -100,6 +108,10 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 +1411-02-21 {"bar":"0998-05-07","foo":"0980-12-16"} ["0011-09-04","1411-09-05"] 2 2014-09-26 +1211-02-11 {"baz":"0981-12-16"} ["0011-09-05"] 2 2014-09-26 +0849-02-11 {"baz":"0921-12-16"} ["0011-09-05"] 2 2014-09-26 +0605-02-11 {"baz":"0981-12-16"} ["0039-09-05"] 2 2014-09-26 PREHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' PREHOOK: type: QUERY PREHOOK: Input: default@avro_date @@ -113,6 +125,10 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 +1411-02-21 {"bar":"0998-05-07","foo":"0980-12-16"} ["0011-09-04","1411-09-05"] 2 2014-09-26 +1211-02-11 {"baz":"0981-12-16"} ["0011-09-05"] 2 2014-09-26 +0849-02-11 {"baz":"0921-12-16"} ["0011-09-05"] 2 2014-09-26 +0605-02-11 {"baz":"0981-12-16"} ["0039-09-05"] 2 2014-09-26 PREHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' PREHOOK: type: QUERY PREHOOK: Input: default@avro_date diff --git a/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out b/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out index 3ae8155b9e..cd401becad 100644 --- a/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out +++ b/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out @@ -107,7 +107,7 @@ Table Parameters: numPartitions 7 numRows 8 rawDataSize 0 - totalSize 3294 + totalSize 3455 #### A masked pattern was here #### # Storage Information @@ -219,7 +219,7 @@ Table Parameters: numPartitions 7 numRows 8 rawDataSize 0 - totalSize 3294 + totalSize 3455 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/avro_timestamp.q.out b/ql/src/test/results/clientpositive/avro_timestamp.q.out index ca18fd97f5..0ac216a180 100644 --- a/ql/src/test/results/clientpositive/avro_timestamp.q.out +++ b/ql/src/test/results/clientpositive/avro_timestamp.q.out @@ -73,6 +73,10 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1412-02-21 07:08:09.123 {"bar":"0998-05-07 07:08:09.123","foo":"0980-12-16 07:08:09.123"} ["0011-09-04 07:08:09.123","0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 PREHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d PREHOOK: type: QUERY PREHOOK: Input: default@avro_timestamp @@ -83,6 +87,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_timestamp POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 #### A masked pattern was here #### +0600-02-11 07:08:09.123 1 +0847-02-11 07:08:09.123 1 +1214-02-11 07:08:09.123 1 +1412-02-21 07:08:09.123 1 1947-02-11 07:08:09.123 1 2012-02-21 07:08:09.123 1 2014-02-11 07:08:09.123 1 @@ -100,6 +108,10 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1412-02-21 07:08:09.123 {"bar":"0998-05-07 07:08:09.123","foo":"0980-12-16 07:08:09.123"} ["0011-09-04 07:08:09.123","0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 PREHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' PREHOOK: type: QUERY PREHOOK: Input: default@avro_timestamp @@ -113,6 +125,10 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1412-02-21 07:08:09.123 {"bar":"0998-05-07 07:08:09.123","foo":"0980-12-16 07:08:09.123"} ["0011-09-04 07:08:09.123","0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 PREHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' PREHOOK: type: QUERY PREHOOK: Input: default@avro_timestamp diff --git a/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out index bd75d7b116..d90ce88bf0 100644 --- a/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out +++ b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out @@ -121,7 +121,7 @@ STAGE PLANS: TableScan alias: testa filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) - Statistics: Num rows: 2 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 5106 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rand() (type: double) outputColumnNames: _col0 @@ -177,7 +177,7 @@ STAGE PLANS: TableScan alias: testa filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) - Statistics: Num rows: 2 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 5106 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rand() (type: double) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index d8831fba2c..e6710d57b1 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -171,7 +171,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -217,7 +217,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -263,7 +263,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -309,7 +309,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git a/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_date.q.out new file mode 100644 index 0000000000..fd74c4e691 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_date.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out new file mode 100644 index 0000000000..9861ff16a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_date.q.out new file mode 100644 index 0000000000..4aec067201 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_date.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table legacy_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_dates.avro' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_dates.avro' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-27 +1014-02-17 +0947-02-16 +0200-02-10 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_timestamp.q.out new file mode 100644 index 0000000000..27c6f3d462 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_timestamp.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table legacy_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_timestamps.avro' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_timestamps.avro' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-27 07:15:11.123 +1014-02-17 07:15:11.123 +0947-02-16 07:15:11.123 +0200-02-10 07:15:11.123 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_date.q.out new file mode 100644 index 0000000000..fd74c4e691 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_date.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out new file mode 100644 index 0000000000..9861ff16a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out index 3c76a2c67e..dba4201ad8 100644 --- a/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out @@ -344,7 +344,7 @@ Table Parameters: numFiles 1 numRows 2 rawDataSize 0 - totalSize 1652 + totalSize 1654 transactional true transactional_properties default #### A masked pattern was here #### @@ -442,7 +442,7 @@ Table Parameters: numFiles 2 numRows 4 rawDataSize 0 - totalSize 3304 + totalSize 3308 transactional true transactional_properties default #### A masked pattern was here #### @@ -536,7 +536,7 @@ Table Parameters: numFiles 3 numRows 12292 rawDataSize 0 - totalSize 312862 + totalSize 312868 transactional true transactional_properties default #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 04ab995749..804f7ed590 100644 --- a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -102,7 +102,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3227 + totalSize 3229 #### A masked pattern was here #### # Storage Information @@ -150,7 +150,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3227 + totalSize 3229 #### A masked pattern was here #### # Storage Information @@ -237,7 +237,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3227 + totalSize 3229 #### A masked pattern was here #### # Storage Information @@ -345,7 +345,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -386,7 +386,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2142 + totalSize 2144 #### A masked pattern was here #### # Storage Information @@ -439,7 +439,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -480,7 +480,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2142 + totalSize 2144 #### A masked pattern was here #### # Storage Information @@ -576,7 +576,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -617,7 +617,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2142 + totalSize 2144 #### A masked pattern was here #### # Storage Information @@ -731,7 +731,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21955 - totalSize 5382 + totalSize 5390 #### A masked pattern was here #### # Storage Information @@ -772,7 +772,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5371 + totalSize 5376 #### A masked pattern was here #### # Storage Information @@ -825,7 +825,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21955 - totalSize 5382 + totalSize 5390 #### A masked pattern was here #### # Storage Information @@ -866,7 +866,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5371 + totalSize 5376 #### A masked pattern was here #### # Storage Information @@ -968,7 +968,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21955 - totalSize 5382 + totalSize 5390 #### A masked pattern was here #### # Storage Information @@ -1009,7 +1009,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5371 + totalSize 5376 #### A masked pattern was here #### # Storage Information @@ -1117,7 +1117,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -1170,7 +1170,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_date.q.out new file mode 100644 index 0000000000..dac30ccb18 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d date) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_timestamp.q.out new file mode 100644 index 0000000000..e0a02317bc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_timestamp.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_date.q.out new file mode 100644 index 0000000000..ec4c2193c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_date.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create table legacy_table (d date) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d date) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_dates.orc' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_dates.orc' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-27 +1014-02-17 +0947-02-16 +0200-02-10 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_timestamp.q.out new file mode 100644 index 0000000000..5f9aa55043 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_timestamp.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create table legacy_table (ts timestamp) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (ts timestamp) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_timestamps.orc' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_timestamps.orc' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:08:09.123 +1014-02-11 07:08:09.123 +0947-02-11 07:08:09.123 +0200-02-11 07:08:09.123 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-27 07:08:09.123 +1014-02-17 07:08:09.123 +0947-02-16 07:08:09.123 +0200-02-10 07:08:09.123 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out index d656c3ca7e..d76f5d81e1 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out @@ -1348,7 +1348,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector_2 - Statistics: Num rows: 12288 Data size: 4468050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 4468070 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge11.q.out b/ql/src/test/results/clientpositive/llap/orc_merge11.q.out index 6a8aae4ae2..c947e8dece 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge11.q.out @@ -76,6 +76,7 @@ File Version: 0.12 with ORC_517 Rows: 50000 Compression: ZLIB Compression size: 4096 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -155,7 +156,7 @@ Stripes: Entry 3: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,506,294,0,232,304 Entry 4: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,666,54,0,312,64 -File length: 6672 bytes +File length: 6674 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -167,6 +168,7 @@ File Version: 0.12 with ORC_517 Rows: 50000 Compression: ZLIB Compression size: 4096 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -246,7 +248,7 @@ Stripes: Entry 3: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,506,294,0,232,304 Entry 4: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,666,54,0,312,64 -File length: 6672 bytes +File length: 6674 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -279,6 +281,7 @@ File Version: 0.12 with ORC_517 Rows: 100000 Compression: ZLIB Compression size: 4096 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -423,7 +426,7 @@ Stripes: Entry 3: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,506,294,0,232,304 Entry 4: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,666,54,0,312,64 -File length: 12978 bytes +File length: 12980 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git a/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_date.q.out new file mode 100644 index 0000000000..dac30ccb18 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d date) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_timestamp.q.out new file mode 100644 index 0000000000..e0a02317bc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_timestamp.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_date.q.out new file mode 100644 index 0000000000..2a834e26c9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d date) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_timestamp.q.out new file mode 100644 index 0000000000..51c6e9ac56 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_timestamp.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_date.q.out new file mode 100644 index 0000000000..b6a0d70d2e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_date.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create table legacy_table (d date) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d date) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_dates.parq' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_dates.parq' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-27 +1014-02-17 +0947-02-16 +0200-02-10 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_timestamp.q.out new file mode 100644 index 0000000000..1259318d18 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_timestamp.q.out @@ -0,0 +1,42 @@ +PREHOOK: query: create table legacy_table (d timestamp) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d timestamp) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_timestamps.parq' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_timestamps.parq' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_proleptic_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_proleptic_mixed_date.q.out new file mode 100644 index 0000000000..d0f61ea866 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_proleptic_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table proleptic_table (d date) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@proleptic_table +POSTHOOK: query: create table proleptic_table (d date) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@proleptic_table +PREHOOK: query: INSERT INTO proleptic_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@proleptic_table +POSTHOOK: query: INSERT INTO proleptic_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@proleptic_table +POSTHOOK: Lineage: proleptic_table.d SCRIPT [] +PREHOOK: query: select * from proleptic_table +PREHOOK: type: QUERY +PREHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +POSTHOOK: query: select * from proleptic_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from proleptic_table +PREHOOK: type: QUERY +PREHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +POSTHOOK: query: select * from proleptic_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table proleptic_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@proleptic_table +PREHOOK: Output: default@proleptic_table +POSTHOOK: query: drop table proleptic_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@proleptic_table +POSTHOOK: Output: default@proleptic_table diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 5367d33e0d..3a004f2f91 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -738,7 +738,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -761,7 +761,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.l3_monthly_dw_dimplan @@ -1247,7 +1247,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1270,7 +1270,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.l3_monthly_dw_dimplan diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index cdeb909b02..0e76b0e9a5 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -3826,7 +3826,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesnullorc - Statistics: Num rows: 12288 Data size: 9450 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 9470 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3834,7 +3834,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 12288 Data size: 9450 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 9470 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out index 2af84f8947..c206eba762 100644 --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out @@ -97,6 +97,7 @@ File Version: 0.12 with ORC_517 Rows: 1049 Compression: ZLIB Compression size: 262144 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -269,7 +270,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 98 loadFactor: 0.0156 expectedFpp: 5.9604645E-8 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 102 loadFactor: 0.0163 expectedFpp: 6.9948186E-8 -File length: 32312 bytes +File length: 32313 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -294,6 +295,7 @@ File Version: 0.12 with ORC_517 Rows: 1049 Compression: ZLIB Compression size: 262144 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -466,7 +468,7 @@ Stripes: Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 174 loadFactor: 0.0181 expectedFpp: 6.426078E-13 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 181 loadFactor: 0.0189 expectedFpp: 8.4693775E-13 -File length: 36956 bytes +File length: 36958 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -503,6 +505,7 @@ File Version: 0.12 with ORC_517 Rows: 1049 Compression: ZLIB Compression size: 262144 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -675,7 +678,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 98 loadFactor: 0.0156 expectedFpp: 5.9604645E-8 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 102 loadFactor: 0.0163 expectedFpp: 6.9948186E-8 -File length: 32312 bytes +File length: 32313 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out index d9f6846236..a1966e25f1 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out @@ -431,3 +431,119 @@ POSTHOOK: query: select * from newtypestbl_n2 where da between '1970-02-18' and POSTHOOK: type: QUERY POSTHOOK: Input: default@newtypestbl_n2 #### A masked pattern was here #### +PREHOOK: query: insert overwrite table newtypestbl_n2 select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("999-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1820-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@newtypestbl_n2 +POSTHOOK: query: insert overwrite table newtypestbl_n2 select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("999-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1820-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@newtypestbl_n2 +POSTHOOK: Lineage: newtypestbl_n2.c EXPRESSION [] +POSTHOOK: Lineage: newtypestbl_n2.d EXPRESSION [] +POSTHOOK: Lineage: newtypestbl_n2.da EXPRESSION [] +POSTHOOK: Lineage: newtypestbl_n2.v EXPRESSION [] +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index db8db1c922..27583b82b8 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -42,12 +42,12 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; -import org.apache.avro.UnresolvedUnionException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveChar; @@ -88,6 +88,11 @@ */ private ZoneId writerTimezone = null; + /** + * Whether the file was written using proleptic Gregorian or hybrid calendar. + */ + private Boolean writerProleptic = null; + private Configuration configuration = null; AvroDeserializer() {} @@ -169,9 +174,10 @@ public Object deserialize(List columnNames, List columnTypes, GenericRecord r = recordWritable.getRecord(); Schema fileSchema = recordWritable.getFileSchema(); writerTimezone = recordWritable.getWriterTimezone(); + writerProleptic = recordWritable.getWriterProleptic(); - UID recordReaderId = recordWritable.getRecordReaderID(); - //If the record reader (from which the record is originated) is already seen and valid, + UID recordReaderId = recordWritable.getRecordReaderID(); + //If the record reader (from which the record is originated) is already seen and valid, //no need to re-encode the record. if(!noEncodingNeeded.contains(recordReaderId)) { SchemaReEncoder reEncoder = null; @@ -311,16 +317,30 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco str = datum.toString(); HiveVarchar hvc = new HiveVarchar(str, maxLength); return hvc; - case DATE: + case DATE: { if (recordSchema.getType() != Type.INT) { throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType()); } - return Date.ofEpochMilli(DateWritableV2.daysToMillis((Integer)datum)); - case TIMESTAMP: + final boolean skipProlepticConversion; + if (writerProleptic != null) { + skipProlepticConversion = writerProleptic; + } else { + if (configuration != null) { + skipProlepticConversion = HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT); + } else { + skipProlepticConversion = HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT.defaultBoolVal; + } + } + + return Date.ofEpochMilli(DateWritableV2.daysToMillis( + skipProlepticConversion ? (Integer) datum : CalendarUtils.convertDateToProleptic((Integer) datum))); + } + case TIMESTAMP: { if (recordSchema.getType() != Type.LONG) { throw new AvroSerdeException( - "Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType()); + "Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType()); } // If a time zone is found in file metadata (property name: writer.time.zone), convert the // timestamp to that (writer) time zone in order to emulate time zone agnostic behavior. @@ -328,23 +348,40 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco // to the server's (reader) time zone for backwards compatibility reasons - unless the // session level configuration hive.avro.timestamp.skip.conversion is set to true, in which // case we assume it was written by a time zone agnostic writer, so we don't convert it. - boolean skipConversion; + final boolean skipUTCConversion; if (configuration != null) { - skipConversion = HiveConf.getBoolVar( + skipUTCConversion = HiveConf.getBoolVar( configuration, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION); } else { - skipConversion = HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION.defaultBoolVal; + skipUTCConversion = HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION.defaultBoolVal; } ZoneId convertToTimeZone; if (writerTimezone != null) { convertToTimeZone = writerTimezone; - } else if (skipConversion) { + } else if (skipUTCConversion) { convertToTimeZone = ZoneOffset.UTC; } else { convertToTimeZone = TimeZone.getDefault().toZoneId(); } - Timestamp timestamp = Timestamp.ofEpochMilli((Long)datum); - return TimestampTZUtil.convertTimestampToZone(timestamp, ZoneOffset.UTC, convertToTimeZone); + final boolean skipProlepticConversion; + if (writerProleptic != null) { + skipProlepticConversion = writerProleptic; + } else { + if (configuration != null) { + skipProlepticConversion = HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT); + } else { + skipProlepticConversion = HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT.defaultBoolVal; + } + } + Timestamp timestamp = TimestampTZUtil.convertTimestampToZone( + Timestamp.ofEpochMilli((Long) datum), ZoneOffset.UTC, convertToTimeZone); + if (!skipProlepticConversion) { + timestamp = Timestamp.ofEpochMilli( + CalendarUtils.convertTimeToProleptic(timestamp.toEpochMilli())); + } + return timestamp; + } default: return datum; } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java index 095197c2ed..92b81a73f0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java @@ -52,6 +52,8 @@ // Time zone file was written in, from metadata private ZoneId writerTimezone = null; + private Boolean writerProleptic = null; + /** * Unique Id determine which record reader created this record */ @@ -78,8 +80,9 @@ public AvroGenericRecordWritable(GenericRecord record) { this.record = record; } - public AvroGenericRecordWritable(ZoneId writerTimezone) { + public AvroGenericRecordWritable(ZoneId writerTimezone, Boolean writerProleptic) { this.writerTimezone = writerTimezone; + this.writerProleptic = writerProleptic; } @Override @@ -153,4 +156,8 @@ public void setFileSchema(Schema originalSchema) { public ZoneId getWriterTimezone() { return writerTimezone; } + + public Boolean getWriterProleptic() { + return writerProleptic; + } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index 653f5912fe..b593a24a61 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -60,6 +60,7 @@ public static final String DATE_TYPE_NAME = "date"; public static final String TIMESTAMP_TYPE_NAME = "timestamp-millis"; public static final String WRITER_TIME_ZONE = "writer.time.zone"; + public static final String WRITER_PROLEPTIC = "writer.proleptic"; public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType"; public static final String AVRO_PROP_PRECISION = "precision"; public static final String AVRO_PROP_SCALE = "scale"; @@ -139,7 +140,7 @@ public void initialize(Configuration configuration, Properties properties) throw this.oi = aoig.getObjectInspector(); if(!badSchema) { - this.avroSerializer = new AvroSerializer(); + this.avroSerializer = new AvroSerializer(configuration); this.avroDeserializer = new AvroDeserializer(configuration); } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java index 4331c11398..490434d2f1 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java @@ -31,13 +31,16 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Fixed; import org.apache.avro.generic.GenericEnumSymbol; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.common.type.CalendarUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -56,13 +59,22 @@ import org.apache.hadoop.io.Writable; class AvroSerializer { + /** * The Schema to use when serializing Map keys. * Since we're sharing this across Serializer instances, it must be immutable; * any properties need to be added in a static initializer. */ private static final Schema STRING_SCHEMA = Schema.create(Schema.Type.STRING); - AvroGenericRecordWritable cache = new AvroGenericRecordWritable(); + private AvroGenericRecordWritable cache = new AvroGenericRecordWritable(); + private boolean defaultProleptic; + + AvroSerializer() {} + + AvroSerializer(Configuration configuration) { + this.defaultProleptic = HiveConf.getBoolVar( + configuration, ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN); + } // Hive is pretty simple (read: stupid) in writing out values via the serializer. // We're just going to go through, matching indices. Hive formats normally @@ -210,12 +222,15 @@ private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fi return vc.getValue(); case DATE: Date date = ((DateObjectInspector)fieldOI).getPrimitiveJavaObject(structFieldData); - return DateWritableV2.dateToDays(date); + return defaultProleptic ? date.toEpochDay() : + CalendarUtils.convertDateToHybrid(date.toEpochDay()); case TIMESTAMP: Timestamp timestamp = ((TimestampObjectInspector) fieldOI).getPrimitiveJavaObject(structFieldData); + long millis = defaultProleptic ? timestamp.toEpochMilli() : + CalendarUtils.convertTimeToHybrid(timestamp.toEpochMilli()); timestamp = TimestampTZUtil.convertTimestampToZone( - timestamp, TimeZone.getDefault().toZoneId(), ZoneOffset.UTC); + Timestamp.ofEpochMilli(millis), TimeZone.getDefault().toZoneId(), ZoneOffset.UTC); return timestamp.toEpochMilli(); case UNKNOWN: throw new AvroSerdeException("Received UNKNOWN primitive category."); diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java index 1cd03f7368..514bca7d1b 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java @@ -293,7 +293,7 @@ public void canDeserializeTimestamps() throws SerDeException, IOException { record.put("timestampField", 1546387200999L); assertTrue(GENERIC_DATA.validate(readerSchema, record)); - AvroGenericRecordWritable agrw = new AvroGenericRecordWritable(ZoneId.of("America/New_York")); + AvroGenericRecordWritable agrw = new AvroGenericRecordWritable(ZoneId.of("America/New_York"), false); agrw.setRecord(record); agrw.setFileSchema(readerSchema); agrw.setRecordReaderID(new UID()); diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java index d1a546f229..a65e84223b 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java @@ -106,6 +106,7 @@ public void testStringify() throws IOException { LongColumnVector x1 = new LongColumnVector(); TimestampColumnVector x2 = new TimestampColumnVector(); x2.setIsUTC(true); + x2.setUsingProlepticCalendar(true); StructColumnVector x = new StructColumnVector(1024, x1, x2); BytesColumnVector y = new BytesColumnVector(); batch.cols[0] = x; @@ -140,6 +141,7 @@ public void testStringify2() throws IOException { LongColumnVector x1 = new LongColumnVector(); TimestampColumnVector x2 = new TimestampColumnVector(); x2.setIsUTC(true); + x2.setUsingProlepticCalendar(true); StructColumnVector x = new StructColumnVector(1024, x1, x2); BytesColumnVector y = new BytesColumnVector(); batch.cols[0] = x;