diff --git a/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java b/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java new file mode 100644 index 0000000000..9b491d0683 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; + +/** + * Conversion utilities from the hybrid Julian/Gregorian calendar to/from the + * proleptic Gregorian. + * + * The semantics here are to hold the string representation constant and change + * the epoch offset rather than holding the instant in time constant and change + * the string representation. + * + * These utilities will be fast for the common case (> 1582 AD), but slow for + * old dates. + */ +public class CalendarUtils { + + private static SimpleDateFormat createFormatter(String fmt, + GregorianCalendar calendar) { + SimpleDateFormat result = new SimpleDateFormat(fmt); + result.setCalendar(calendar); + return result; + } + + private static final String DATE = "yyyy-MM-dd"; + private static final String TIME = DATE + " HH:mm:ss.SSS"; + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); + private static final GregorianCalendar HYBRID = new GregorianCalendar(); + private static final ThreadLocal HYBRID_DATE_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(DATE, HYBRID)); + private static final ThreadLocal HYBRID_TIME_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(TIME, HYBRID)); + private static final long SWITCHOVER_MILLIS; + private static final long SWITCHOVER_DAYS; + private static final GregorianCalendar PROLEPTIC = new GregorianCalendar(); + private static final ThreadLocal PROLEPTIC_DATE_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(DATE, PROLEPTIC)); + private static final ThreadLocal PROLEPTIC_TIME_FORMAT = + ThreadLocal.withInitial(() -> createFormatter(TIME, PROLEPTIC)); + + static { + HYBRID.setTimeZone(UTC); + PROLEPTIC.setTimeZone(UTC); + PROLEPTIC.setGregorianChange(new Date(Long.MIN_VALUE)); + + // Get the last day where the two calendars agree with each other. + try { + SWITCHOVER_MILLIS = HYBRID_DATE_FORMAT.get().parse("1582-10-15").getTime(); + SWITCHOVER_DAYS = TimeUnit.MILLISECONDS.toDays(SWITCHOVER_MILLIS); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse switch over date", e); + } + } + + /** + * Convert an epoch day from the hybrid Julian/Gregorian calendar to the + * proleptic Gregorian. + * @param hybrid day of epoch in the hybrid Julian/Gregorian + * @return day of epoch in the proleptic Gregorian + */ + public static int convertDateToProleptic(int hybrid) { + int proleptic = hybrid; + if (hybrid < SWITCHOVER_DAYS) { + String dateStr = HYBRID_DATE_FORMAT.get().format( + new Date(TimeUnit.DAYS.toMillis(hybrid))); + try { + proleptic = (int) TimeUnit.MILLISECONDS.toDays( + PROLEPTIC_DATE_FORMAT.get().parse(dateStr).getTime()); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return proleptic; + } + + /** + * Convert an epoch day from the proleptic Gregorian calendar to the hybrid + * Julian/Gregorian. + * @param proleptic day of epoch in the proleptic Gregorian + * @return day of epoch in the hybrid Julian/Gregorian + */ + public static int convertDateToHybrid(int proleptic) { + int hyrbid = proleptic; + if (proleptic < SWITCHOVER_DAYS) { + String dateStr = PROLEPTIC_DATE_FORMAT.get().format( + new Date(TimeUnit.DAYS.toMillis(proleptic))); + try { + hyrbid = (int) TimeUnit.MILLISECONDS.toDays( + HYBRID_DATE_FORMAT.get().parse(dateStr).getTime()); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return hyrbid; + } + + public static int convertDate(int original, + boolean fromProleptic, + boolean toProleptic) { + if (fromProleptic != toProleptic) { + return toProleptic + ? convertDateToProleptic(original) + : convertDateToHybrid(original); + } else { + return original; + } + } + + public static long convertTime(long original, + boolean fromProleptic, + boolean toProleptic) { + if (fromProleptic != toProleptic) { + return toProleptic + ? convertTimeToProleptic(original) + : convertTimeToHybrid(original); + } else { + return original; + } + } + /** + * Convert epoch millis from the hybrid Julian/Gregorian calendar to the + * proleptic Gregorian. + * @param hybrid millis of epoch in the hybrid Julian/Gregorian + * @return millis of epoch in the proleptic Gregorian + */ + public static long convertTimeToProleptic(long hybrid) { + long proleptic = hybrid; + if (hybrid < SWITCHOVER_MILLIS) { + String dateStr = HYBRID_TIME_FORMAT.get().format(new Date(hybrid)); + try { + proleptic = PROLEPTIC_TIME_FORMAT.get().parse(dateStr).getTime(); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return proleptic; + } + + /** + * Convert epoch millis from the proleptic Gregorian calendar to the hybrid + * Julian/Gregorian. + * @param proleptic millis of epoch in the proleptic Gregorian + * @return millis of epoch in the hybrid Julian/Gregorian + */ + public static long convertTimeToHybrid(long proleptic) { + long hybrid = proleptic; + if (proleptic < SWITCHOVER_MILLIS) { + String dateStr = PROLEPTIC_TIME_FORMAT.get().format(new Date(proleptic)); + try { + hybrid = HYBRID_TIME_FORMAT.get().parse(dateStr).getTime(); + } catch (ParseException e) { + throw new IllegalArgumentException("Can't parse " + dateStr, e); + } + } + return hybrid; + } + + private CalendarUtils() { + throw new UnsupportedOperationException(); + } +} \ No newline at end of file diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7bb860b93c..756934bce1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1999,12 +1999,24 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion", true, "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" + "on reading parquet files from other tools"), + HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN("hive.parquet.date.proleptic.gregorian", false, + "Should we write date using the proleptic Gregorian calendar instead of the hybrid Julian Gregorian?\n" + + "Hybrid is the default."), + HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT("hive.parquet.date.proleptic.gregorian.default", false, + "This value controls whether date type in Parquet files was written using the hybrid or proleptic\n" + + "calendar. Hybrid is the default."), HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION("hive.avro.timestamp.skip.conversion", false, "Some older Hive implementations (pre-3.1) wrote Avro timestamps in a UTC-normalized" + "manner, while from version 3.1 until now Hive wrote time zone agnostic timestamps. " + "Setting this flag to true will treat legacy timestamps as time zone agnostic. Setting " + "it to false will treat legacy timestamps as UTC-normalized. This flag will not affect " + "timestamps written after this change."), + HIVE_AVRO_PROLEPTIC_GREGORIAN("hive.avro.proleptic.gregorian", false, + "Should we write date and timestamp using the proleptic Gregorian calendar instead of the hybrid Julian Gregorian?\n" + + "Hybrid is the default."), + HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT("hive.avro.proleptic.gregorian.default", false, + "This value controls whether date and timestamp type in Avro files was written using the hybrid or proleptic\n" + + "calendar. Hybrid is the default."), HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS("hive.int.timestamp.conversion.in.seconds", false, "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" + "Set this flag to true to interpret the value as seconds to be consistent with float/double." ), diff --git a/data/files/avro_date.txt b/data/files/avro_date.txt index 0858896e5e..939db2603c 100644 --- a/data/files/avro_date.txt +++ b/data/files/avro_date.txt @@ -2,3 +2,7 @@ 2014-02-11|baz:1981-12-16|2011-09-05 1947-02-11|baz:1921-12-16|2011-09-05 8200-02-11|baz:6981-12-16|1039-09-05 +1411-02-21|foo:0980-12-16,bar:0998-05-07|0011-09-04,1411-09-05 +1211-02-11|baz:0981-12-16|0011-09-05 +0849-02-11|baz:0921-12-16|0011-09-05 +0605-02-11|baz:0981-12-16|0039-09-05 diff --git a/data/files/avro_legacy_mixed_dates.avro b/data/files/avro_legacy_mixed_dates.avro new file mode 100644 index 0000000000000000000000000000000000000000..f80f6d9d1bb9704bcbc508c8f871c560582d46da GIT binary patch literal 236 zcmeZI%3@>^ODrqO*DFrWNX<=L#Z;|SQdy9yWTjM;nw(#hqNJmgmzWFUgH*+*B$lK? zg^CLjlR@GsscDI&IVC{Rw9M3;lwu{T=xUfAkamc1(MoxxIXOx?aN{%cKw5J0(=(G3 zb3!0if=#cD)q$I71=3U-TgzaQA@y{1dRW2@wl9;L{h!^ODrqO*DFrWNX<<=$5gFUQdy9yWTjM;nw(#hqNJmgmzWFUgH*+rWag$8 zmn7yEK*fs-5|craDXD3Rr8y-)(X`CeoRnfEtLSQ&Mvxg0GozLAN^^3Qbl`U6h&GNCrMmZ8kgtHW^9qr~lvuB)fCzVQ>N zVmfwVN&6OGreg=*PHzch+PVMvv{o&qV-NbTzuX+jwDbPtrfv6mnRdQE_3`O#5Iujv S`)hnmJI^oq(7ag^-8cYG6?2vV literal 0 HcmV?d00001 diff --git a/data/files/avro_timestamp.txt b/data/files/avro_timestamp.txt index a989f0e0b7..6af27ba387 100644 --- a/data/files/avro_timestamp.txt +++ b/data/files/avro_timestamp.txt @@ -1,4 +1,8 @@ 2012-02-21 07:08:09.123|foo:1980-12-16 07:08:09.123,bar:1998-05-07 07:08:09.123|2011-09-04 07:08:09.123,2011-09-05 07:08:09.123 2014-02-11 07:08:09.123|baz:1981-12-16 07:08:09.123|2011-09-05 07:08:09.123 1947-02-11 07:08:09.123|baz:1921-12-16 07:08:09.123|2011-09-05 07:08:09.123 -8200-02-11 07:08:09.123|baz:6981-12-16 07:08:09.123|1039-09-05 07:08:09.123 \ No newline at end of file +8200-02-11 07:08:09.123|baz:6981-12-16 07:08:09.123|1039-09-05 07:08:09.123 +1412-02-21 07:08:09.123|foo:0980-12-16 07:08:09.123,bar:0998-05-07 07:08:09.123|0011-09-04 07:08:09.123,0011-09-05 07:08:09.123 +1214-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0011-09-05 07:08:09.123 +0847-02-11 07:08:09.123|baz:0921-12-16 07:08:09.123|0011-09-05 07:08:09.123 +0600-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0039-09-05 07:08:09.123 \ No newline at end of file diff --git a/data/files/orc_legacy_mixed_dates.orc b/data/files/orc_legacy_mixed_dates.orc new file mode 100644 index 0000000000000000000000000000000000000000..94c561dcbe67eab370c38034e2c0b06a63c93f53 GIT binary patch literal 213 zcmeYdau#G@;9?VE;ot~hFa$EixR@Cj7=-vaIIK81&VKO|xbtW#Qvic014AA=Lxmkf z?E?l!f7ikouC>}+%R9N#b_(zEHDh3Sob)(BIAOwrXG|A+gf2PE67)FDWmssyU?|LB z3^Y}o3v2-wy0N7U3>?e?DN=l>F-3`qgWrI|3arx#U8lnY dKMq>~Mu~<70VO5|9t{}-wx%!4%>F^n;s8ndFTMZ( literal 0 HcmV?d00001 diff --git a/data/files/orc_legacy_mixed_timestamps.orc b/data/files/orc_legacy_mixed_timestamps.orc new file mode 100644 index 0000000000000000000000000000000000000000..137fb25449cfc3e473c4707c18afe46279b47168 GIT binary patch literal 276 zcmeYdau#G@;9?VE;ot~haAIJ19P}V5nSoi+kdIHFv2*Tc`I7B9(?TrF9-ewPeYP^Q z1VbnTLj^kn*P&^?49ua;z6}49y}hFzuPWQbw`b~AZ@#S>hrIbWnss?gtxnVMW@ljF zWxg8$G&bpRf^fow2hW%;_6TV?%o6l?&6F9Ya8~Wek%J6Je1c*Y9{k1Fd2(jv%##d` zKnKZjfgQvp#=+qwAhC1)gYK5ik_yKrT-p6{b7TNRD+2=uv%nNaDJ~$TFJY{pufd_j z!NDWM$SB2_qQu0(X~1CxHo*#q2@VtdI9vr7B^nw8l$aQJG?We4n!YeI`v*CT0|2LL BR9OH3 literal 0 HcmV?d00001 diff --git a/data/files/parquet_legacy_mixed_dates.parq b/data/files/parquet_legacy_mixed_dates.parq new file mode 100644 index 0000000000000000000000000000000000000000..b1dbacdd34addabd998b466f857ecde9784464c6 GIT binary patch literal 245 zcmWG=3^EjD5cLuD(Ge8@GT1~pWF%PD?NDNnV7aOE?Y|fU5HJC;0HeGO1H%V928M|H z|Nny&WUc)5-+Ilj|BHQo{Rb%(Ws=nK;LgY_ON}p1&PdHo6lG!%Wm095U`$cvVGt9L zl#!HChG#)yQDJFniEeI@LRo52ab|v= zf}x&;o}q$9QfX#Rih@aEQld#pVybykl0k||YD!Xanz^Nsabl{Wv9V>EkwsdHahiEr Pa+)T?BA`D4fF1?_%g;ic literal 0 HcmV?d00001 diff --git a/data/files/parquet_legacy_mixed_timestamps.parq b/data/files/parquet_legacy_mixed_timestamps.parq new file mode 100644 index 0000000000000000000000000000000000000000..84aaf1ce08f858afa2360b24b80c8e30ebb8822f GIT binary patch literal 359 zcmWG=3^EjD5k0^NL_VSdqAUyy2ly`S=`&j$cVB@nzC@PNdg5$fuds9n1D!) z!v getTypes(); TypeDescription getSchema() throws FileFormatException; OrcFile.Version getFileVersion(); + CalendarKind getCalendar(); } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java index 5eb713cc68..5b5bde9ea2 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java @@ -26,6 +26,7 @@ import org.apache.orc.FileMetadata; import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; +import org.apache.orc.OrcProto.CalendarKind; import org.apache.orc.OrcProto.StripeStatistics; import org.apache.orc.OrcUtils; import org.apache.orc.StripeInformation; @@ -51,6 +52,7 @@ private final long numberOfRows; private final boolean isOriginalFormat; private final OrcFile.Version fileVersion; + private final CalendarKind calendar; public OrcFileMetadata(Object fileKey, OrcProto.Footer footer, OrcProto.PostScript ps, List stats, List stripes, final OrcFile.Version fileVersion) { @@ -69,6 +71,7 @@ public OrcFileMetadata(Object fileKey, OrcProto.Footer footer, OrcProto.PostScri this.fileStats = footer.getStatisticsList(); this.fileKey = fileKey; this.fileVersion = fileVersion; + this.calendar = footer.getCalendar(); } // FileMetadata @@ -170,4 +173,9 @@ public TypeDescription getSchema() throws FileFormatException { public OrcFile.Version getFileVersion() { return fileVersion; } + + @Override + public CalendarKind getCalendar() { + return calendar; + } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/filemeta/OrcFileMetadataHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/filemeta/OrcFileMetadataHandler.java index c5757f395d..92442d14bb 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/filemeta/OrcFileMetadataHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/filemeta/OrcFileMetadataHandler.java @@ -51,7 +51,7 @@ public void getFileMetadataByExpr(List fileIds, byte[] expr, ByteBuffer metadata = metadatas[i].duplicate(); // Duplicate to avoid modification. SplitInfos result = null; try { - result = getFileFormatProxy().applySargToMetadata(sarg, metadata); + result = getFileFormatProxy().applySargToMetadata(sarg, metadata, conf); } catch (IOException ex) { LOG.error("Failed to apply SARG to metadata", ex); metadatas[i] = null; diff --git a/pom.xml b/pom.xml index 2dd2128e88..89f0dc5638 100644 --- a/pom.xml +++ b/pom.xml @@ -192,7 +192,7 @@ 2.12.1 2.5.0 2.3 - 1.5.8 + 1.5.9 1.10.19 1.7.4 2.0.0-M5 diff --git a/ql/pom.xml b/ql/pom.xml index 3632a5efe4..8b0c02b9b1 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -977,6 +977,7 @@ net.sf.opencsv:opencsv org.apache.hive:hive-spark-client org.apache.hive:hive-storage-api + org.threeten:threeten-extra org.apache.orc:orc-core org.apache.orc:orc-shims org.apache.orc:orc-tools diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 6cccd9e8b2..9bb59bc955 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -137,9 +137,10 @@ public static ColumnVector createColumnVector(TypeInfo typeInfo, case SHORT: case INT: case LONG: - case DATE: case INTERVAL_YEAR_MONTH: return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case DATE: + return new DateColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case TIMESTAMP: return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case INTERVAL_DAY_TIME: @@ -574,13 +575,14 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect return typeInfoList.toArray(new TypeInfo[0]); } - public static ColumnVector makeLikeColumnVector(ColumnVector source - ) throws HiveException{ + public static ColumnVector makeLikeColumnVector(ColumnVector source) throws HiveException{ if (source instanceof Decimal64ColumnVector) { Decimal64ColumnVector dec64ColVector = (Decimal64ColumnVector) source; return new Decimal64ColumnVector(dec64ColVector.vector.length, dec64ColVector.precision, dec64ColVector.scale); + } else if (source instanceof DateColumnVector) { + return new DateColumnVector(((DateColumnVector) source).vector.length); } else if (source instanceof LongColumnVector) { return new LongColumnVector(((LongColumnVector) source).vector.length); } else if (source instanceof DoubleColumnVector) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java index be7d8b7ca1..fd10e08e45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.java @@ -31,6 +31,7 @@ import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,6 +81,8 @@ // add writer.time.zone property to file metadata dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString()); + dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf( + HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN))); dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java index 1927e0e6e2..f27cb230b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java @@ -64,6 +64,7 @@ final private long start; final private long stop; private ZoneId writerTimezone; + private Boolean writerProleptic; protected JobConf jobConf; final private boolean isEmptyInput; /** @@ -102,6 +103,7 @@ public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) this.recordReaderID = new UID(); this.writerTimezone = extractWriterTimezoneFromMetadata(job, split, gdr); + this.writerProleptic = extractWriterProlepticFromMetadata(job, split, gdr); } /** @@ -171,6 +173,28 @@ private ZoneId extractWriterTimezoneFromMetadata(JobConf job, FileSplit split, return null; } + private Boolean extractWriterProlepticFromMetadata(JobConf job, FileSplit split, + GenericDatumReader gdr) throws IOException { + if (job == null || gdr == null || split == null || split.getPath() == null) { + return null; + } + try { + DataFileReader dataFileReader = + new DataFileReader(new FsInput(split.getPath(), job), gdr); + if (dataFileReader.getMeta(AvroSerDe.WRITER_PROLEPTIC) != null) { + try { + return Boolean.valueOf(new String(dataFileReader.getMeta(AvroSerDe.WRITER_PROLEPTIC), + StandardCharsets.UTF_8)); + } catch (DateTimeException e) { + throw new RuntimeException("Can't parse writer proleptic property stored in file metadata", e); + } + } + } catch (IOException e) { + // Can't access metadata, carry on. + } + return null; + } + private boolean pathIsInPartition(Path split, Path partitionPath) { boolean schemeless = split.toUri().getScheme() == null; if (schemeless) { @@ -203,7 +227,7 @@ public NullWritable createKey() { @Override public AvroGenericRecordWritable createValue() { - return new AvroGenericRecordWritable(writerTimezone); + return new AvroGenericRecordWritable(writerTimezone, writerProleptic); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java index bb75ebf983..3139b10d03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java @@ -43,6 +43,8 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId; +import org.apache.orc.OrcConf; +import org.apache.orc.OrcProto; import org.apache.orc.impl.OrcTail; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -311,7 +313,7 @@ private static OrcTail createOrcTailFromMs( try { OrcTail orcTail = ReaderImpl.extractFileTail(copy, fs.getLen(), fs.getModificationTime()); // trigger lazy read of metadata to make sure serialized data is not corrupted and readable - orcTail.getStripeStatistics(); + orcTail.getStripeStatistics(false, false); return orcTail; } catch (Exception ex) { byte[] data = new byte[bb.remaining()]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index e246ac24a5..349eb254f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -68,6 +68,7 @@ public static Reader createReader(FileSystem fs, public ReaderOptions(Configuration conf) { super(conf); useUTCTimestamp(true); + convertToProlepticGregorian(true); } public ReaderOptions filesystem(FileSystem fs) { @@ -94,6 +95,11 @@ public ReaderOptions useUTCTimestamp(boolean value) { super.useUTCTimestamp(value); return this; } + + public ReaderOptions convertToProlepticGregorian(boolean value) { + super.convertToProlepticGregorian(value); + return this; + } } public static ReaderOptions readerOptions(Configuration conf) { @@ -331,6 +337,11 @@ public WriterOptions useUTCTimestamp(boolean value) { return this; } + public WriterOptions setProlepticGregorian(boolean value) { + super.setProlepticGregorian(value); + return this; + } + ObjectInspector getInspector() { return inspector; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java index 11f27dfb44..148b50db94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java @@ -21,15 +21,16 @@ import java.nio.ByteBuffer; import java.util.List; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.FileFormatProxy; import org.apache.hadoop.hive.metastore.Metastore.SplitInfo; import org.apache.hadoop.hive.metastore.Metastore.SplitInfos; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; import org.apache.orc.StripeInformation; -import org.apache.orc.StripeStatistics; import org.apache.orc.impl.OrcTail; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,15 +41,19 @@ @Override public SplitInfos applySargToMetadata( - SearchArgument sarg, ByteBuffer fileMetadata) throws IOException { + SearchArgument sarg, ByteBuffer fileMetadata, Configuration conf) throws IOException { // TODO: ideally we should store shortened representation of only the necessary fields // in HBase; it will probably require custom SARG application code. OrcTail orcTail = ReaderImpl.extractFileTail(fileMetadata); OrcProto.Footer footer = orcTail.getFooter(); int stripeCount = footer.getStripesCount(); + boolean writerUsedProlepticGregorian = footer.hasCalendar() + ? footer.getCalendar() == OrcProto.CalendarKind.PROLEPTIC_GREGORIAN + : OrcConf.PROLEPTIC_GREGORIAN_DEFAULT.getBoolean(conf); boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg( sarg, orcTail.getWriterVersion(), - footer.getTypesList(), orcTail.getStripeStatistics(), stripeCount); + footer.getTypesList(), orcTail.getStripeStatistics(writerUsedProlepticGregorian, true), + stripeCount); // For ORC case, send the boundaries of the stripes so we don't have to send the footer. SplitInfos.Builder sb = SplitInfos.newBuilder(); List stripes = orcTail.getStripes(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 76984abd0a..a069032b57 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -123,6 +123,7 @@ import org.apache.hive.common.util.Ref; import org.apache.orc.ColumnStatistics; import org.apache.orc.FileFormatException; +import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; import org.apache.orc.OrcProto.Footer; import org.apache.orc.OrcUtils; @@ -1732,7 +1733,11 @@ private void populateAndCacheStripeDetails() throws IOException { stripeStats = orcReader.getStripeStatistics(); } else { stripes = orcTail.getStripes(); - stripeStats = orcTail.getStripeStatistics(); + OrcProto.Footer footer = orcTail.getFooter(); + boolean writerUsedProlepticGregorian = footer.hasCalendar() + ? footer.getCalendar() == OrcProto.CalendarKind.PROLEPTIC_GREGORIAN + : OrcConf.PROLEPTIC_GREGORIAN_DEFAULT.getBoolean(context.conf); + stripeStats = orcTail.getStripeStatistics(writerUsedProlepticGregorian, true); } fileTypes = orcTail.getTypes(); TypeDescription fileSchema = OrcUtils.convertTypeFromProtobuf(fileTypes, 0); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 889bd586d7..d0a6c6e25a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -433,7 +434,7 @@ static DateWritableV2 nextDate(ColumnVector vector, } else { result = (DateWritableV2) previous; } - int date = (int) ((LongColumnVector) vector).vector[row]; + int date = (int) ((DateColumnVector) vector).vector[row]; result.set(date); return result; } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 4082c61237..58a0c54b7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -204,12 +205,14 @@ static void setColumn(int rowId, ColumnVector column, case TIMESTAMP: { TimestampColumnVector vector = (TimestampColumnVector) column; vector.setIsUTC(true); + vector.setUsingProlepticCalendar(true); vector.set(rowId, ((TimestampObjectInspector) inspector) .getPrimitiveJavaObject(obj).toSqlTimestamp()); break; } case DATE: { - LongColumnVector vector = (LongColumnVector) column; + DateColumnVector vector = (DateColumnVector) column; + vector.setUsingProlepticCalendar(true); vector.vector[rowId] = ((DateObjectInspector) inspector) .getPrimitiveWritableObject(obj).getDays(); break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java index 91a02feb20..577051d0aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -54,6 +54,7 @@ protected Path file; protected ProjectionPusher projectionPusher; protected boolean skipTimestampConversion = false; + protected Boolean skipProlepticConversion; protected SerDeStats serDeStats; protected JobConf jobConf; @@ -130,6 +131,13 @@ protected ParquetInputSplit getSplit( if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); } + skipProlepticConversion = DataWritableReadSupport + .getWriterDateProleptic(fileMetaData.getKeyValueMetaData()); + if (skipProlepticConversion == null) { + skipProlepticConversion = HiveConf.getBoolVar( + conf, HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT); + } + split = new ParquetInputSplit(finalPath, splitStart, splitLength, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index d67b030648..f9a7dbe93c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -689,7 +690,14 @@ PrimitiveConverter getConverter(final PrimitiveType type, final int index, final return new PrimitiveConverter() { @Override public void addInt(final int value) { - parent.set(index, new DateWritableV2(value)); + Map metadata = parent.getMetadata(); + Boolean skipProlepticConversion = DataWritableReadSupport.getWriterDateProleptic(metadata); + if (skipProlepticConversion == null) { + skipProlepticConversion = Boolean.parseBoolean( + metadata.get(HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT.varname)); + } + parent.set(index, + new DateWritableV2(skipProlepticConversion ? value : CalendarUtils.convertDateToProleptic(value))); } }; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index d3245fc155..ba146c57ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; @@ -284,6 +285,25 @@ public static ZoneId getWriterTimeZoneId(Map metadata) { return null; } + /** + * Get the proleptic from some metadata, otherwise return null. + */ + public static Boolean getWriterDateProleptic(Map metadata) { + if (metadata == null) { + return null; + } + String value = metadata.get(DataWritableWriteSupport.WRITER_DATE_PROLEPTIC); + try { + if (value != null) { + return Boolean.valueOf(value); + } + } catch (DateTimeException e) { + throw new RuntimeException("Can't parse writer proleptic property stored in file metadata", e); + } + + return null; + } + /** * Return the columns which contains required nested attribute level * E.g., given struct a: while 'x' is required and 'y' is not, the method will return @@ -487,6 +507,22 @@ private static MessageType getRequestedPrunedSchema( + "file footer's writer time zone."); } + String writerProleptic = DataWritableWriteSupport.WRITER_DATE_PROLEPTIC; + if (!metadata.containsKey(writerProleptic)) { + if (keyValueMetaData.containsKey(writerProleptic)) { + metadata.put(writerProleptic, keyValueMetaData.get(writerProleptic)); + } + } else if (!metadata.get(writerProleptic).equals(keyValueMetaData.get(writerProleptic))) { + throw new IllegalStateException("Metadata contains a writer proleptic property value that does not match " + + "file footer's value."); + } + + String prolepticDefault = ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT.varname; + if (!metadata.containsKey(prolepticDefault)) { + metadata.put(prolepticDefault, String.valueOf(HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT))); + } + return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index f1cce81b98..f9d0a5629b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -16,6 +16,7 @@ import java.time.ZoneId; import java.time.ZoneOffset; import java.util.Calendar; +import java.util.Date; import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; @@ -40,7 +41,10 @@ private static Calendar getGMTCalendar() { //Calendar.getInstance calculates the current-time needlessly, so cache an instance. if (parquetGMTCalendar.get() == null) { - parquetGMTCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); + GregorianCalendar calendar = new GregorianCalendar(); + calendar.setTimeZone(TimeZone.getTimeZone("GMT")); + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); + parquetGMTCalendar.set(calendar); } parquetGMTCalendar.get().clear(); return parquetGMTCalendar.get(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java index 05d85cae39..8d3cb7c2de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java @@ -55,6 +55,7 @@ protected boolean skipTimestampConversion = false; protected ZoneId writerTimezone = null; + protected boolean skipProlepticConversion = false; /** * Total number of values read. @@ -119,6 +120,7 @@ public BaseVectorizedColumnReader( PageReader pageReader, boolean skipTimestampConversion, ZoneId writerTimezone, + boolean skipProlepticConversion, Type parquetType, TypeInfo hiveType) throws IOException { this.descriptor = descriptor; this.type = parquetType; @@ -126,6 +128,7 @@ public BaseVectorizedColumnReader( this.maxDefLevel = descriptor.getMaxDefinitionLevel(); this.skipTimestampConversion = skipTimestampConversion; this.writerTimezone = writerTimezone; + this.skipProlepticConversion = skipProlepticConversion; this.hiveType = hiveType; DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index 5d161596f0..6136ce056d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -49,9 +49,9 @@ boolean isFirstRow = true; public VectorizedListColumnReader(ColumnDescriptor descriptor, PageReader pageReader, - boolean skipTimestampConversion, ZoneId writerTimezone, Type type, TypeInfo hiveType) - throws IOException { - super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType); + boolean skipTimestampConversion, ZoneId writerTimezone, boolean skipProlepticConversion, + Type type, TypeInfo hiveType) throws IOException { + super(descriptor, pageReader, skipTimestampConversion, writerTimezone, skipProlepticConversion, type, hiveType); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index ea6dfb8a88..2104746365 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -456,13 +456,13 @@ private void checkEndOfRowGroup() throws IOException { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(colsToInclude.get(i)), types.get(i), - pages, requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0); + pages, requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, skipProlepticConversion, 0); } } } else { for (int i = 0; i < types.size(); ++i) { columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(i), types.get(i), pages, - requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0); + requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, skipProlepticConversion, 0); } } @@ -506,6 +506,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( List columnDescriptors, boolean skipTimestampConversion, ZoneId writerTimezone, + boolean skipProlepticConversion, int depth) throws IOException { List descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors); @@ -517,8 +518,8 @@ private VectorizedColumnReader buildVectorizedParquetReader( } if (fileSchema.getColumns().contains(descriptors.get(0))) { return new VectorizedPrimitiveColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, type, - typeInfo); + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, skipProlepticConversion, + type, typeInfo); } else { // Support for schema evolution return new VectorizedDummyColumnReader(); @@ -531,7 +532,7 @@ private VectorizedColumnReader buildVectorizedParquetReader( for (int i = 0; i < fieldTypes.size(); i++) { VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, - skipTimestampConversion, writerTimezone, depth + 1); + skipTimestampConversion, writerTimezone, skipProlepticConversion, depth + 1); if (r != null) { fieldReaders.add(r); } else { @@ -549,9 +550,8 @@ private VectorizedColumnReader buildVectorizedParquetReader( } return new VectorizedListColumnReader(descriptors.get(0), - pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, - getElementType(type), - typeInfo); + pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, skipProlepticConversion, + getElementType(type), typeInfo); case MAP: if (columnDescriptors == null || columnDescriptors.isEmpty()) { throw new RuntimeException( @@ -583,10 +583,10 @@ private VectorizedColumnReader buildVectorizedParquetReader( List kvTypes = groupType.getFields(); VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader( descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, - writerTimezone, kvTypes.get(0), typeInfo); + writerTimezone, skipProlepticConversion, kvTypes.get(0), typeInfo); VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader( descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, - writerTimezone, kvTypes.get(1), typeInfo); + writerTimezone, skipProlepticConversion, kvTypes.get(1), typeInfo); return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader); case UNION: default: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java index 2803baf90c..d29d6bdd15 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java @@ -15,10 +15,12 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -48,10 +50,11 @@ public VectorizedPrimitiveColumnReader( PageReader pageReader, boolean skipTimestampConversion, ZoneId writerTimezone, + boolean skipProlepticConversion, Type type, TypeInfo hiveType) throws IOException { - super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType); + super(descriptor, pageReader, skipTimestampConversion, writerTimezone, skipProlepticConversion, type, hiveType); } @Override @@ -101,6 +104,8 @@ private void readBatchHelper( readSmallInts(num, (LongColumnVector) column, rowId); break; case DATE: + readDate(num, (DateColumnVector) column, rowId); + break; case INTERVAL_YEAR_MONTH: case LONG: readLongs(num, (LongColumnVector) column, rowId); @@ -438,7 +443,34 @@ private void readBinaries( } } + private void readDate( + int total, + DateColumnVector c, + int rowId) throws IOException { + c.setUsingProlepticCalendar(true); + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = skipProlepticConversion ? + dataColumn.readLong() : CalendarUtils.convertDateToProleptic((int) dataColumn.readLong()); + if (dataColumn.isValid()) { + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.vector[rowId] = 0; + setNullValue(c, rowId); + } + } else { + setNullValue(c, rowId); + } + rowId++; + left--; + } + } + private void readTimestamp(int total, TimestampColumnVector c, int rowId) throws IOException { + c.setUsingProlepticCalendar(true); int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -513,6 +545,19 @@ private void decodeDictionaryIds( } break; case DATE: + DateColumnVector dc = (DateColumnVector) column; + dc.setUsingProlepticCalendar(true); + for (int i = rowId; i < rowId + num; ++i) { + dc.vector[i] = + skipProlepticConversion ? + dictionary.readLong((int) dictionaryIds.vector[i]) : + CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i])); + if (!dictionary.isValid()) { + setNullValue(column, i); + dc.vector[i] = 0; + } + } + break; case INTERVAL_YEAR_MONTH: case LONG: for (int i = rowId; i < rowId + num; ++i) { @@ -594,9 +639,10 @@ private void decodeDictionaryIds( } break; case TIMESTAMP: + TimestampColumnVector tsc = (TimestampColumnVector) column; + tsc.setUsingProlepticCalendar(true); for (int i = rowId; i < rowId + num; ++i) { - ((TimestampColumnVector) column) - .set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp()); + tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp()); } break; case INTERVAL_DAY_TIME: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java index 8acde81a3d..f4212f4481 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java @@ -18,6 +18,7 @@ import java.util.TimeZone; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hive.common.util.HiveVersionInfo; @@ -35,9 +36,11 @@ public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema"; public static final String WRITER_TIMEZONE = "writer.time.zone"; + public static final String WRITER_DATE_PROLEPTIC = "writer.date.proleptic"; private DataWritableWriter writer; private MessageType schema; + private boolean defaultDateProleptic; public static void setSchema(final MessageType schema, final Configuration configuration) { configuration.set(PARQUET_HIVE_SCHEMA, schema.toString()); @@ -52,12 +55,15 @@ public WriteContext init(final Configuration configuration) { schema = getSchema(configuration); Map metaData = new HashMap<>(); metaData.put(WRITER_TIMEZONE, TimeZone.getDefault().toZoneId().toString()); + defaultDateProleptic = HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN); + metaData.put(WRITER_DATE_PROLEPTIC, String.valueOf(defaultDateProleptic)); return new WriteContext(schema, metaData); } @Override public void prepareForWrite(final RecordConsumer recordConsumer) { - writer = new DataWritableWriter(recordConsumer, schema); + writer = new DataWritableWriter(recordConsumer, schema, defaultDateProleptic); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index bd519eb66e..1834008c67 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -18,6 +18,7 @@ import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -64,14 +65,17 @@ private static final Logger LOG = LoggerFactory.getLogger(DataWritableWriter.class); protected final RecordConsumer recordConsumer; private final GroupType schema; + private final boolean defaultDateProleptic; /* This writer will be created when writing the first row in order to get information about how to inspect the record data. */ private DataWriter messageWriter; - public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema) { + public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema, + final boolean defaultDateProleptic) { this.recordConsumer = recordConsumer; this.schema = schema; + this.defaultDateProleptic = defaultDateProleptic; } /** @@ -552,7 +556,9 @@ public DateDataWriter(DateObjectInspector inspector) { @Override public void write(Object value) { Date vDate = inspector.getPrimitiveJavaObject(value); - recordConsumer.addInteger(DateWritableV2.dateToDays(vDate)); + recordConsumer.addInteger( + defaultDateProleptic ? DateWritableV2.dateToDays(vDate) : + CalendarUtils.convertDateToHybrid(DateWritableV2.dateToDays(vDate))); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index bfabdce736..764c401340 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -204,12 +205,21 @@ private static Object boxLiteral(ExprNodeConstantDesc constantDesc, } return fl.doubleValue(); case TIMESTAMP: - if (lit instanceof org.apache.hadoop.hive.common.type.Timestamp) { - return ((org.apache.hadoop.hive.common.type.Timestamp) lit).toSqlTimestamp(); + final Timestamp ts; + if (lit instanceof Timestamp) { + ts = (Timestamp) lit; + } else if (lit instanceof org.apache.hadoop.hive.common.type.Timestamp) { + ts = ((org.apache.hadoop.hive.common.type.Timestamp) lit) + .toSqlTimestamp(); + } else { + ts = org.apache.hadoop.hive.common.type.Timestamp.valueOf(lit.toString()) + .toSqlTimestamp(); } - return Timestamp.valueOf(lit.toString()); + return ts; case DATE: - return Date.valueOf(lit.toString()); + return new Date( + DateWritable.daysToMillis( + org.apache.hadoop.hive.common.type.Date.valueOf(lit.toString()).toEpochDay())); case DECIMAL: return new HiveDecimalWritable(lit.toString()); case BOOLEAN: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java index 5818e6b3c0..4bf05f323b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; @@ -251,7 +252,9 @@ private Object convertLiteral(Object o) { // This is a bit hackish to fix mismatch between SARG and Hive types // for Timestamp and Date. TODO: Move those types to storage-api. if (o instanceof java.sql.Date) { - return Date.valueOf(o.toString()); + java.sql.Date sqlDate = (java.sql.Date)o; + return Date.ofEpochDay( + DateWritable.millisToDays(sqlDate.getTime())); } else if (o instanceof java.sql.Timestamp) { java.sql.Timestamp sqlTimestamp = (java.sql.Timestamp)o; return Timestamp.ofEpochMilli(sqlTimestamp.getTime(), sqlTimestamp.getNanos()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index ff8884172d..bb149bc93b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -192,10 +193,13 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, case SHORT: case INT: case LONG: - case DATE: colVector = new LongColumnVector(); break; + case DATE: + colVector = new DateColumnVector(); + break; + case FLOAT: case DOUBLE: colVector = new DoubleColumnVector(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index 220431a444..154fe12640 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -1312,7 +1312,8 @@ public void createOrcDateFile(Path file, int minYear, int maxYear .inspector(inspector) .stripeSize(100000) .bufferSize(10000) - .blockPadding(false)); + .blockPadding(false) + .setProlepticGregorian(true)); OrcStruct row = new OrcStruct(2); for (int year = minYear; year < maxYear; ++year) { for (int ms = 1000; ms < 2000; ++ms) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java index b242392a9a..01d9d2e27a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java @@ -192,7 +192,7 @@ private ParquetHiveRecord getParquetWritable(String columnNames, String columnTy private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); - DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); + DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema, false); hiveParquetWriter.write(record); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index 9ea78508ee..bc5e560629 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -15,9 +15,11 @@ import java.time.ZoneId; import java.util.Calendar; +import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; @@ -41,12 +43,13 @@ @Test public void testJulianDay() { //check if May 23, 1968 is Julian Day 2440000 - Calendar cal = Calendar.getInstance(); + GregorianCalendar cal = new GregorianCalendar(); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + cal.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal.set(Calendar.YEAR, 1968); cal.set(Calendar.MONTH, Calendar.MAY); cal.set(Calendar.DAY_OF_MONTH, 23); cal.set(Calendar.HOUR_OF_DAY, 0); - cal.setTimeZone(TimeZone.getTimeZone("GMT")); Timestamp ts = Timestamp.ofEpochMilli(cal.getTimeInMillis()); NanoTime nt = NanoTimeUtils.getNanoTime(ts, false); @@ -56,12 +59,13 @@ public void testJulianDay() { Assert.assertEquals(tsFetched, ts); //check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005. - Calendar cal1 = Calendar.getInstance(); + GregorianCalendar cal1 = new GregorianCalendar(); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + cal1.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal1.set(Calendar.YEAR, 2005); cal1.set(Calendar.MONTH, Calendar.JANUARY); cal1.set(Calendar.DAY_OF_MONTH, 1); cal1.set(Calendar.HOUR_OF_DAY, 0); - cal1.setTimeZone(TimeZone.getTimeZone("GMT")); Timestamp ts1 = Timestamp.ofEpochMilli(cal1.getTimeInMillis()); NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, false); @@ -69,12 +73,13 @@ public void testJulianDay() { Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); Assert.assertEquals(ts1Fetched, ts1); - Calendar cal2 = Calendar.getInstance(); + GregorianCalendar cal2 = new GregorianCalendar(); + cal2.setTimeZone(TimeZone.getTimeZone("UTC")); + cal2.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal2.set(Calendar.YEAR, 2005); cal2.set(Calendar.MONTH, Calendar.JANUARY); cal2.set(Calendar.DAY_OF_MONTH, 31); cal2.set(Calendar.HOUR_OF_DAY, 0); - cal2.setTimeZone(TimeZone.getTimeZone("UTC")); Timestamp ts2 = Timestamp.ofEpochMilli(cal2.getTimeInMillis()); NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, false); @@ -86,12 +91,13 @@ public void testJulianDay() { // check if 730517 Julian Days between Jan 1, 0005 and Jan 31, 2005. // This method used to test Julian Days between Jan 1, 2005 BCE and Jan 1, 2005 CE. Since BCE // timestamps are not supported, both dates were changed to CE. - cal1 = Calendar.getInstance(); + cal1 = new GregorianCalendar(); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + cal1.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal1.set(Calendar.YEAR, 0005); cal1.set(Calendar.MONTH, Calendar.JANUARY); cal1.set(Calendar.DAY_OF_MONTH, 1); cal1.set(Calendar.HOUR_OF_DAY, 0); - cal1.setTimeZone(TimeZone.getTimeZone("GMT")); ts1 = Timestamp.ofEpochMilli(cal1.getTimeInMillis()); nt1 = NanoTimeUtils.getNanoTime(ts1, false); @@ -99,20 +105,27 @@ public void testJulianDay() { ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); Assert.assertEquals(ts1Fetched, ts1); - cal2 = Calendar.getInstance(); + cal2 = new GregorianCalendar(); + cal2.setTimeZone(TimeZone.getTimeZone("UTC")); + cal2.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); cal2.set(Calendar.YEAR, 2005); cal2.set(Calendar.MONTH, Calendar.JANUARY); cal2.set(Calendar.DAY_OF_MONTH, 31); cal2.set(Calendar.HOUR_OF_DAY, 0); - cal2.setTimeZone(TimeZone.getTimeZone("UTC")); ts2 = Timestamp.ofEpochMilli(cal2.getTimeInMillis()); nt2 = NanoTimeUtils.getNanoTime(ts2, false); ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); Assert.assertEquals(ts2Fetched, ts2); - Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 730517); -} + Assert.assertEquals(730517, nt2.getJulianDay() - nt1.getJulianDay()); + + Date d1 = Date.ofEpochMilli(cal1.getTimeInMillis()); + Assert.assertEquals("0005-01-01", d1.toString()); + + Date d2 = Date.ofEpochMilli(cal2.getTimeInMillis()); + Assert.assertEquals("2005-01-31", d2.toString()); + } @Test public void testNanos() { diff --git a/ql/src/test/queries/clientpositive/avro_hybrid_mixed_date.q b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_date.q new file mode 100644 index 0000000000..5d9807b370 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_date.q @@ -0,0 +1,22 @@ +create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q new file mode 100644 index 0000000000..28fc99c51b --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q @@ -0,0 +1,22 @@ +create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/avro_legacy_mixed_date.q b/ql/src/test/queries/clientpositive/avro_legacy_mixed_date.q new file mode 100644 index 0000000000..437e432080 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_legacy_mixed_date.q @@ -0,0 +1,14 @@ +create table legacy_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +load data local inpath '../../data/files/avro_legacy_mixed_dates.avro' into table legacy_table; + +select * from legacy_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; diff --git a/ql/src/test/queries/clientpositive/avro_legacy_mixed_timestamp.q b/ql/src/test/queries/clientpositive/avro_legacy_mixed_timestamp.q new file mode 100644 index 0000000000..e1e6870eb6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_legacy_mixed_timestamp.q @@ -0,0 +1,14 @@ +create table legacy_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +load data local inpath '../../data/files/avro_legacy_mixed_timestamps.avro' into table legacy_table; + +select * from legacy_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; diff --git a/ql/src/test/queries/clientpositive/avro_proleptic_mixed_date.q b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_date.q new file mode 100644 index 0000000000..401f0a6b50 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_date.q @@ -0,0 +1,24 @@ +set hive.avro.proleptic.gregorian=true; + +create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q new file mode 100644 index 0000000000..5a67ab59d2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q @@ -0,0 +1,24 @@ +set hive.avro.proleptic.gregorian=true; + +create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set hive.avro.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date.q b/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date.q index 02d923b322..015b648b4c 100644 --- a/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date.q +++ b/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date.q @@ -27,5 +27,11 @@ insert into table change_allowincompatible_vectorization_false_date partition (s select ts from change_allowincompatible_vectorization_false_date where ts='2038-03-22 07:26:48.0' and s='aaa'; +insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0'); + +select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa'; + set hive.vectorized.execution.enabled=true; select ts from change_allowincompatible_vectorization_false_date where ts='2038-03-22 07:26:48.0' and s='aaa'; + +select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa'; diff --git a/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date2.q b/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date2.q new file mode 100644 index 0000000000..934f43e6b4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date2.q @@ -0,0 +1,21 @@ +--! qt:dataset:alltypesorc + +set hive.vectorized.execution.enabled=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.tez.bucket.pruning=true; +set hive.optimize.index.filter=true; +set hive.metastore.disallow.incompatible.col.type.changes=false; + +create table change_allowincompatible_vectorization_false_date (ts date) partitioned by (s string) clustered by (ts) into 32 buckets stored as orc tblproperties ('transactional'='true'); + +alter table change_allowincompatible_vectorization_false_date add partition(s='aaa'); + +alter table change_allowincompatible_vectorization_false_date add partition(s='bbb'); + +insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0'); + +select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa'; + +set hive.vectorized.execution.enabled=true; +select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa'; diff --git a/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date3.q b/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date3.q new file mode 100644 index 0000000000..3b7024eb45 --- /dev/null +++ b/ql/src/test/queries/clientpositive/change_allowincompatible_vectorization_false_date3.q @@ -0,0 +1,21 @@ +--! qt:dataset:alltypesorc + +set hive.vectorized.execution.enabled=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.tez.bucket.pruning=true; +set hive.optimize.index.filter=true; +set hive.metastore.disallow.incompatible.col.type.changes=false; + +create table change_allowincompatible_vectorization_false_date (ts timestamp) partitioned by (s string) clustered by (ts) into 32 buckets stored as orc tblproperties ('transactional'='true'); + +alter table change_allowincompatible_vectorization_false_date add partition(s='aaa'); + +alter table change_allowincompatible_vectorization_false_date add partition(s='bbb'); + +insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0'); + +select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa'; + +set hive.vectorized.execution.enabled=true; +select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa'; diff --git a/ql/src/test/queries/clientpositive/orc_hybrid_mixed_date.q b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_date.q new file mode 100644 index 0000000000..bf71ab3b34 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_date.q @@ -0,0 +1,20 @@ +create table hybrid_table (d date) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_hybrid_mixed_timestamp.q b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_timestamp.q new file mode 100644 index 0000000000..0fd80294fe --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_hybrid_mixed_timestamp.q @@ -0,0 +1,20 @@ +create table hybrid_table (d timestamp) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_legacy_mixed_date.q b/ql/src/test/queries/clientpositive/orc_legacy_mixed_date.q new file mode 100644 index 0000000000..451c9834f1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_legacy_mixed_date.q @@ -0,0 +1,12 @@ +create table legacy_table (d date) +stored as orc; + +load data local inpath '../../data/files/orc_legacy_mixed_dates.orc' into table legacy_table; + +select * from legacy_table; + +set orc.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/orc_legacy_mixed_timestamp.q b/ql/src/test/queries/clientpositive/orc_legacy_mixed_timestamp.q new file mode 100644 index 0000000000..6488f4dc99 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_legacy_mixed_timestamp.q @@ -0,0 +1,12 @@ +create table legacy_table (ts timestamp) +stored as orc; + +load data local inpath '../../data/files/orc_legacy_mixed_timestamps.orc' into table legacy_table; + +select * from legacy_table; + +set orc.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/orc_proleptic_mixed_date.q b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_date.q new file mode 100644 index 0000000000..55aaede8b9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_date.q @@ -0,0 +1,22 @@ +set orc.proleptic.gregorian=true; + +create table hybrid_table (d date) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/orc_proleptic_mixed_timestamp.q b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_timestamp.q new file mode 100644 index 0000000000..92d91e42fd --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_proleptic_mixed_timestamp.q @@ -0,0 +1,22 @@ +set orc.proleptic.gregorian=true; + +create table hybrid_table (d timestamp) +stored as orc; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +set orc.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_date.q b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_date.q new file mode 100644 index 0000000000..67a0cee90b --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_date.q @@ -0,0 +1,20 @@ +create table hybrid_table (d date) +stored as parquet; + +INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from hybrid_table; + +set hive.parquet.date.proleptic.gregorian.default=true; + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_timestamp.q b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_timestamp.q new file mode 100644 index 0000000000..4c64b7efd2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_hybrid_mixed_timestamp.q @@ -0,0 +1,16 @@ +create table hybrid_table (d timestamp) +stored as parquet; + +INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123'); + +select * from hybrid_table; + +drop table hybrid_table; diff --git a/ql/src/test/queries/clientpositive/parquet_legacy_mixed_date.q b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_date.q new file mode 100644 index 0000000000..bf2345c3af --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_date.q @@ -0,0 +1,12 @@ +create table legacy_table (d date) +stored as parquet; + +load data local inpath '../../data/files/parquet_legacy_mixed_dates.parq' into table legacy_table; + +select * from legacy_table; + +set hive.parquet.date.proleptic.gregorian.default=true; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_legacy_mixed_timestamp.q b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_timestamp.q new file mode 100644 index 0000000000..280df40076 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_legacy_mixed_timestamp.q @@ -0,0 +1,8 @@ +create table legacy_table (d timestamp) +stored as parquet; + +load data local inpath '../../data/files/parquet_legacy_mixed_timestamps.parq' into table legacy_table; + +select * from legacy_table; + +drop table legacy_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q index 82085beea3..8027e9184b 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_date.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q @@ -103,3 +103,31 @@ select * from newtypestbl_n2 where da between '1970-02-18' and '1970-02-19'; set hive.optimize.index.filter=true; select * from newtypestbl_n2 where da between '1970-02-18' and '1970-02-19'; + +insert overwrite table newtypestbl_n2 select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("999-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1820-02-27" as date) from src src2 limit 10) uniontbl; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); + +set hive.vectorized.execution.enabled=true; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da='999-02-20'; + +set hive.optimize.index.filter=false; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); + +set hive.optimize.index.filter=true; +select * from newtypestbl_n2 where da=cast('999-02-20' as date); diff --git a/ql/src/test/queries/clientpositive/parquet_proleptic_mixed_date.q b/ql/src/test/queries/clientpositive/parquet_proleptic_mixed_date.q new file mode 100644 index 0000000000..17b5448738 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_proleptic_mixed_date.q @@ -0,0 +1,22 @@ +set hive.parquet.date.proleptic.gregorian=true; + +create table proleptic_table (d date) +stored as parquet; + +INSERT INTO proleptic_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11'); + +select * from proleptic_table; + +set hive.parquet.date.proleptic.gregorian.default=true; + +select * from proleptic_table; + +drop table proleptic_table; diff --git a/ql/src/test/results/clientpositive/avro_date.q.out b/ql/src/test/results/clientpositive/avro_date.q.out index 32501cf9f1..ff969a1ffe 100644 --- a/ql/src/test/results/clientpositive/avro_date.q.out +++ b/ql/src/test/results/clientpositive/avro_date.q.out @@ -73,6 +73,10 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 +1411-02-21 {"bar":"0998-05-07","foo":"0980-12-16"} ["0011-09-04","1411-09-05"] 2 2014-09-26 +1211-02-11 {"baz":"0981-12-16"} ["0011-09-05"] 2 2014-09-26 +0849-02-11 {"baz":"0921-12-16"} ["0011-09-05"] 2 2014-09-26 +0605-02-11 {"baz":"0981-12-16"} ["0039-09-05"] 2 2014-09-26 PREHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d PREHOOK: type: QUERY PREHOOK: Input: default@avro_date @@ -83,6 +87,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_date POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 #### A masked pattern was here #### +0605-02-11 1 +0849-02-11 1 +1211-02-11 1 +1411-02-21 1 1947-02-11 1 2012-02-21 1 2014-02-11 1 @@ -100,6 +108,10 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 +1411-02-21 {"bar":"0998-05-07","foo":"0980-12-16"} ["0011-09-04","1411-09-05"] 2 2014-09-26 +1211-02-11 {"baz":"0981-12-16"} ["0011-09-05"] 2 2014-09-26 +0849-02-11 {"baz":"0921-12-16"} ["0011-09-05"] 2 2014-09-26 +0605-02-11 {"baz":"0981-12-16"} ["0039-09-05"] 2 2014-09-26 PREHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' PREHOOK: type: QUERY PREHOOK: Input: default@avro_date @@ -113,6 +125,10 @@ POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 +1411-02-21 {"bar":"0998-05-07","foo":"0980-12-16"} ["0011-09-04","1411-09-05"] 2 2014-09-26 +1211-02-11 {"baz":"0981-12-16"} ["0011-09-05"] 2 2014-09-26 +0849-02-11 {"baz":"0921-12-16"} ["0011-09-05"] 2 2014-09-26 +0605-02-11 {"baz":"0981-12-16"} ["0039-09-05"] 2 2014-09-26 PREHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' PREHOOK: type: QUERY PREHOOK: Input: default@avro_date diff --git a/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out b/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out index 3ae8155b9e..cd401becad 100644 --- a/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out +++ b/ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out @@ -107,7 +107,7 @@ Table Parameters: numPartitions 7 numRows 8 rawDataSize 0 - totalSize 3294 + totalSize 3455 #### A masked pattern was here #### # Storage Information @@ -219,7 +219,7 @@ Table Parameters: numPartitions 7 numRows 8 rawDataSize 0 - totalSize 3294 + totalSize 3455 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/avro_timestamp.q.out b/ql/src/test/results/clientpositive/avro_timestamp.q.out index ca18fd97f5..0ac216a180 100644 --- a/ql/src/test/results/clientpositive/avro_timestamp.q.out +++ b/ql/src/test/results/clientpositive/avro_timestamp.q.out @@ -73,6 +73,10 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1412-02-21 07:08:09.123 {"bar":"0998-05-07 07:08:09.123","foo":"0980-12-16 07:08:09.123"} ["0011-09-04 07:08:09.123","0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 PREHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d PREHOOK: type: QUERY PREHOOK: Input: default@avro_timestamp @@ -83,6 +87,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@avro_timestamp POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 #### A masked pattern was here #### +0600-02-11 07:08:09.123 1 +0847-02-11 07:08:09.123 1 +1214-02-11 07:08:09.123 1 +1412-02-21 07:08:09.123 1 1947-02-11 07:08:09.123 1 2012-02-21 07:08:09.123 1 2014-02-11 07:08:09.123 1 @@ -100,6 +108,10 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1412-02-21 07:08:09.123 {"bar":"0998-05-07 07:08:09.123","foo":"0980-12-16 07:08:09.123"} ["0011-09-04 07:08:09.123","0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 PREHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' PREHOOK: type: QUERY PREHOOK: Input: default@avro_timestamp @@ -113,6 +125,10 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1412-02-21 07:08:09.123 {"bar":"0998-05-07 07:08:09.123","foo":"0980-12-16 07:08:09.123"} ["0011-09-04 07:08:09.123","0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"} ["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"} ["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 PREHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' PREHOOK: type: QUERY PREHOOK: Input: default@avro_timestamp diff --git a/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out index bd75d7b116..d90ce88bf0 100644 --- a/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out +++ b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out @@ -121,7 +121,7 @@ STAGE PLANS: TableScan alias: testa filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) - Statistics: Num rows: 2 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 5106 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rand() (type: double) outputColumnNames: _col0 @@ -177,7 +177,7 @@ STAGE PLANS: TableScan alias: testa filterExpr: ((part1 = 'CA') and (part2 = 'ABC')) (type: boolean) - Statistics: Num rows: 2 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 5106 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rand() (type: double) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index d8831fba2c..e6710d57b1 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -171,7 +171,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -217,7 +217,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -263,7 +263,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -309,7 +309,7 @@ STAGE PLANS: serialization.ddl struct date_dim_n1 { date d_date} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 199 + totalSize 201 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde diff --git a/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_date.q.out new file mode 100644 index 0000000000..fd74c4e691 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_date.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out new file mode 100644 index 0000000000..9861ff16a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_date.q.out new file mode 100644 index 0000000000..4aec067201 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_date.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table legacy_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_dates.avro' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_dates.avro' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-27 +1014-02-17 +0947-02-16 +0200-02-10 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_timestamp.q.out new file mode 100644 index 0000000000..27c6f3d462 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_legacy_mixed_timestamp.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table legacy_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_timestamps.avro' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/avro_legacy_mixed_timestamps.avro' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-27 07:15:11.123 +1014-02-17 07:15:11.123 +0947-02-16 07:15:11.123 +0200-02-10 07:15:11.123 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_date.q.out new file mode 100644 index 0000000000..fd74c4e691 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_date.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out new file mode 100644 index 0000000000..9861ff16a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' +stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out index f3716b1e8c..26b3d3487c 100644 --- a/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out +++ b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out @@ -84,6 +84,26 @@ POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa #### A masked pattern was here #### 2038-03-22 07:26:48 +PREHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: Lineage: change_allowincompatible_vectorization_false_date PARTITION(s=aaa).ts SCRIPT [] +PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +PREHOOK: type: QUERY +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +POSTHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +0001-01-01 00:00:00 PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='2038-03-22 07:26:48.0' and s='aaa' PREHOOK: type: QUERY PREHOOK: Input: default@change_allowincompatible_vectorization_false_date @@ -95,3 +115,14 @@ POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa #### A masked pattern was here #### 2038-03-22 07:26:48 +PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +PREHOOK: type: QUERY +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +POSTHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +0001-01-01 00:00:00 diff --git a/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date2.q.out b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date2.q.out new file mode 100644 index 0000000000..eff5a3bd0f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date2.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: create table change_allowincompatible_vectorization_false_date (ts date) partitioned by (s string) clustered by (ts) into 32 buckets stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: query: create table change_allowincompatible_vectorization_false_date (ts date) partitioned by (s string) clustered by (ts) into 32 buckets stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date +PREHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='aaa') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='aaa') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +PREHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='bbb') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='bbb') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=bbb +PREHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: Lineage: change_allowincompatible_vectorization_false_date PARTITION(s=aaa).ts SCRIPT [] +PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +PREHOOK: type: QUERY +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +POSTHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +0001-01-01 +PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +PREHOOK: type: QUERY +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +POSTHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +0001-01-01 diff --git a/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date3.q.out b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date3.q.out new file mode 100644 index 0000000000..7cd84a275c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date3.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: create table change_allowincompatible_vectorization_false_date (ts timestamp) partitioned by (s string) clustered by (ts) into 32 buckets stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: query: create table change_allowincompatible_vectorization_false_date (ts timestamp) partitioned by (s string) clustered by (ts) into 32 buckets stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date +PREHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='aaa') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='aaa') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +PREHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='bbb') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: query: alter table change_allowincompatible_vectorization_false_date add partition(s='bbb') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=bbb +PREHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('0001-01-01 00:00:00.0') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: Lineage: change_allowincompatible_vectorization_false_date PARTITION(s=aaa).ts SCRIPT [] +PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +PREHOOK: type: QUERY +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +POSTHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +0001-01-01 00:00:00 +PREHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +PREHOOK: type: QUERY +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +POSTHOOK: query: select ts from change_allowincompatible_vectorization_false_date where ts='0001-01-01 00:00:00.0' and s='aaa' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +#### A masked pattern was here #### +0001-01-01 00:00:00 diff --git a/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out index 3c76a2c67e..dba4201ad8 100644 --- a/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_values_orig_table_use_metadata.q.out @@ -344,7 +344,7 @@ Table Parameters: numFiles 1 numRows 2 rawDataSize 0 - totalSize 1652 + totalSize 1654 transactional true transactional_properties default #### A masked pattern was here #### @@ -442,7 +442,7 @@ Table Parameters: numFiles 2 numRows 4 rawDataSize 0 - totalSize 3304 + totalSize 3308 transactional true transactional_properties default #### A masked pattern was here #### @@ -536,7 +536,7 @@ Table Parameters: numFiles 3 numRows 12292 rawDataSize 0 - totalSize 312862 + totalSize 312868 transactional true transactional_properties default #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 04ab995749..804f7ed590 100644 --- a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -102,7 +102,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3227 + totalSize 3229 #### A masked pattern was here #### # Storage Information @@ -150,7 +150,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3227 + totalSize 3229 #### A masked pattern was here #### # Storage Information @@ -237,7 +237,7 @@ Table Parameters: numFiles 1 numRows 100 rawDataSize 52600 - totalSize 3227 + totalSize 3229 #### A masked pattern was here #### # Storage Information @@ -345,7 +345,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -386,7 +386,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2142 + totalSize 2144 #### A masked pattern was here #### # Storage Information @@ -439,7 +439,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -480,7 +480,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2142 + totalSize 2144 #### A masked pattern was here #### # Storage Information @@ -576,7 +576,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -617,7 +617,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 22050 - totalSize 2142 + totalSize 2144 #### A masked pattern was here #### # Storage Information @@ -731,7 +731,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21955 - totalSize 5382 + totalSize 5390 #### A masked pattern was here #### # Storage Information @@ -772,7 +772,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5371 + totalSize 5376 #### A masked pattern was here #### # Storage Information @@ -825,7 +825,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21955 - totalSize 5382 + totalSize 5390 #### A masked pattern was here #### # Storage Information @@ -866,7 +866,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5371 + totalSize 5376 #### A masked pattern was here #### # Storage Information @@ -968,7 +968,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 21955 - totalSize 5382 + totalSize 5390 #### A masked pattern was here #### # Storage Information @@ -1009,7 +1009,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5371 + totalSize 5376 #### A masked pattern was here #### # Storage Information @@ -1117,7 +1117,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information @@ -1170,7 +1170,7 @@ Partition Parameters: numFiles 1 numRows 50 rawDataSize 21950 - totalSize 2129 + totalSize 2131 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_date.q.out new file mode 100644 index 0000000000..dac30ccb18 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d date) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_timestamp.q.out new file mode 100644 index 0000000000..e0a02317bc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_hybrid_mixed_timestamp.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_date.q.out new file mode 100644 index 0000000000..ec4c2193c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_date.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create table legacy_table (d date) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d date) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_dates.orc' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_dates.orc' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-27 +1014-02-17 +0947-02-16 +0200-02-10 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_timestamp.q.out new file mode 100644 index 0000000000..5f9aa55043 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_legacy_mixed_timestamp.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create table legacy_table (ts timestamp) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (ts timestamp) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_timestamps.orc' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/orc_legacy_mixed_timestamps.orc' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:08:09.123 +1014-02-11 07:08:09.123 +0947-02-11 07:08:09.123 +0200-02-11 07:08:09.123 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-27 07:08:09.123 +1014-02-17 07:08:09.123 +0947-02-16 07:08:09.123 +0200-02-10 07:08:09.123 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out index d656c3ca7e..d76f5d81e1 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out @@ -1348,7 +1348,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector_2 - Statistics: Num rows: 12288 Data size: 4468050 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 4468070 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge11.q.out b/ql/src/test/results/clientpositive/llap/orc_merge11.q.out index 6a8aae4ae2..c947e8dece 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge11.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge11.q.out @@ -76,6 +76,7 @@ File Version: 0.12 with ORC_517 Rows: 50000 Compression: ZLIB Compression size: 4096 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -155,7 +156,7 @@ Stripes: Entry 3: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,506,294,0,232,304 Entry 4: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,666,54,0,312,64 -File length: 6672 bytes +File length: 6674 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -167,6 +168,7 @@ File Version: 0.12 with ORC_517 Rows: 50000 Compression: ZLIB Compression size: 4096 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -246,7 +248,7 @@ Stripes: Entry 3: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,506,294,0,232,304 Entry 4: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,666,54,0,312,64 -File length: 6672 bytes +File length: 6674 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -279,6 +281,7 @@ File Version: 0.12 with ORC_517 Rows: 100000 Compression: ZLIB Compression size: 4096 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -423,7 +426,7 @@ Stripes: Entry 3: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,506,294,0,232,304 Entry 4: count: 10000 hasNull: false min: 1969-12-31 16:04:10.0 max: 1969-12-31 16:04:10.0 positions: 0,666,54,0,312,64 -File length: 12978 bytes +File length: 12980 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git a/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_date.q.out new file mode 100644 index 0000000000..dac30ccb18 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d date) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_timestamp.q.out new file mode 100644 index 0000000000..e0a02317bc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_proleptic_mixed_timestamp.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_date.q.out new file mode 100644 index 0000000000..2a834e26c9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table hybrid_table (d date) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d date) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_timestamp.q.out new file mode 100644 index 0000000000..51c6e9ac56 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_hybrid_mixed_timestamp.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: create table hybrid_table (d timestamp) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: create table hybrid_table (d timestamp) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hybrid_table +PREHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: INSERT INTO hybrid_table VALUES +('2012-02-21 07:08:09.123'), +('2014-02-11 07:08:09.123'), +('1947-02-11 07:08:09.123'), +('8200-02-11 07:08:09.123'), +('1012-02-21 07:15:11.123'), +('1014-02-11 07:15:11.123'), +('0947-02-11 07:15:11.123'), +('0200-02-11 07:15:11.123') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hybrid_table +POSTHOOK: Lineage: hybrid_table.d SCRIPT [] +PREHOOK: query: select * from hybrid_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +POSTHOOK: query: select * from hybrid_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hybrid_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table hybrid_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hybrid_table +PREHOOK: Output: default@hybrid_table +POSTHOOK: query: drop table hybrid_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hybrid_table +POSTHOOK: Output: default@hybrid_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_date.q.out new file mode 100644 index 0000000000..b6a0d70d2e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_date.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: create table legacy_table (d date) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d date) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_dates.parq' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_dates.parq' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-27 +1014-02-17 +0947-02-16 +0200-02-10 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_timestamp.q.out b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_timestamp.q.out new file mode 100644 index 0000000000..1259318d18 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_legacy_mixed_timestamp.q.out @@ -0,0 +1,42 @@ +PREHOOK: query: create table legacy_table (d timestamp) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@legacy_table +POSTHOOK: query: create table legacy_table (d timestamp) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@legacy_table +PREHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_timestamps.parq' into table legacy_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@legacy_table +POSTHOOK: query: load data local inpath '../../data/files/parquet_legacy_mixed_timestamps.parq' into table legacy_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@legacy_table +PREHOOK: query: select * from legacy_table +PREHOOK: type: QUERY +PREHOOK: Input: default@legacy_table +#### A masked pattern was here #### +POSTHOOK: query: select * from legacy_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@legacy_table +#### A masked pattern was here #### +2012-02-21 07:08:09.123 +2014-02-11 07:08:09.123 +1947-02-11 07:08:09.123 +8200-02-11 07:08:09.123 +1012-02-21 07:15:11.123 +1014-02-11 07:15:11.123 +0947-02-11 07:15:11.123 +0200-02-11 07:15:11.123 +PREHOOK: query: drop table legacy_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@legacy_table +PREHOOK: Output: default@legacy_table +POSTHOOK: query: drop table legacy_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@legacy_table +POSTHOOK: Output: default@legacy_table diff --git a/ql/src/test/results/clientpositive/llap/parquet_proleptic_mixed_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_proleptic_mixed_date.q.out new file mode 100644 index 0000000000..d0f61ea866 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_proleptic_mixed_date.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: create table proleptic_table (d date) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@proleptic_table +POSTHOOK: query: create table proleptic_table (d date) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@proleptic_table +PREHOOK: query: INSERT INTO proleptic_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@proleptic_table +POSTHOOK: query: INSERT INTO proleptic_table VALUES +('2012-02-21'), +('2014-02-11'), +('1947-02-11'), +('8200-02-11'), +('1012-02-21'), +('1014-02-11'), +('0947-02-11'), +('0200-02-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@proleptic_table +POSTHOOK: Lineage: proleptic_table.d SCRIPT [] +PREHOOK: query: select * from proleptic_table +PREHOOK: type: QUERY +PREHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +POSTHOOK: query: select * from proleptic_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: select * from proleptic_table +PREHOOK: type: QUERY +PREHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +POSTHOOK: query: select * from proleptic_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@proleptic_table +#### A masked pattern was here #### +2012-02-21 +2014-02-11 +1947-02-11 +8200-02-11 +1012-02-21 +1014-02-11 +0947-02-11 +0200-02-11 +PREHOOK: query: drop table proleptic_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@proleptic_table +PREHOOK: Output: default@proleptic_table +POSTHOOK: query: drop table proleptic_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@proleptic_table +POSTHOOK: Output: default@proleptic_table diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out index 81125cb0d5..80216505c2 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out @@ -534,11 +534,11 @@ POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n6 POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n6@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 c33 b -101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970120000000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original +101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970117179000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original 102 1 0.000000000000000000 127.000000000000000000 32767.000000000000000000 2147483647.000000000000000000 9223372036854775807.000000000000000000 NULL NULL -99999999999999999999.999999999999999999 -99999999999999999999.999000000000000000 -99999999999999999999.999000000000000000 126117919850.597000000000000000 0.0 127.0 32767.0 2.14748365E9 9.223372E18 -1.0E20 -Infinity -Infinity -3.4028233E23 -3.4028233E23 1.26117921E11 0.0 127.0 32767.0 2.147483647E9 9.223372036854776E18 -1.0E20 -Infinity -1.7976931348623157E308 -1.7976931348623157E308 -1.7976931348623157E308 1.26117919850597E11 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359780000000000000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35978 30.774 30.774 30.774 2.70887654E8 original -105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368500000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original +104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359779357910160000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35977935791016 30.774 30.774 30.774 2.70887654E8 original +105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368540000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original 111 1 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 new PREHOOK: query: drop table part_change_various_various_decimal_to_double_n6 PREHOOK: type: DROPTABLE @@ -687,11 +687,11 @@ POSTHOOK: Input: default@part_change_various_various_timestamp_n6 POSTHOOK: Input: default@part_change_various_various_timestamp_n6@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 b -101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 09:54:28.970117179 6229-06-28 09:54:28.97011 6229-06-28 09:54:28.97011 1950-12-18 08:00:00 original -102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL 1970-01-01 00:00:00 NULL 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 2049-12-18 08:00:00 original +101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 02:54:28.970117179 6229-06-28 02:54:28.97011 6229-06-28 02:54:28.97011 1950-12-18 00:00:00 original +102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL NULL NULL 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 2049-12-18 00:00:00 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.640220643 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 13:34:14 1978-08-02 13:34:14 1978-08-02 13:34:14 2021-09-24 07:00:00 original -105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-07 00:20:39.72036854 1991-01-07 00:20:39.72036 1991-01-07 00:20:39.72036 2024-11-11 08:00:00 original +104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.64 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 06:34:14 1978-08-02 06:34:14 1978-08-02 06:34:14 2021-09-24 00:00:00 original +105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-06 16:20:39.72036854 1991-01-06 16:20:39.72036 1991-01-06 16:20:39.72036 2024-11-11 00:00:00 original 111 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL new PREHOOK: query: drop table part_change_various_various_timestamp_n6 PREHOOK: type: DROPTABLE @@ -824,10 +824,10 @@ POSTHOOK: Input: default@part_change_various_various_date_n6 POSTHOOK: Input: default@part_change_various_various_date_n6@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 b -101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-27 original -102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-08 original +101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-28 original +102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-09 original 103 1 NULL NULL NULL NULL original -104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-01 original +104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-02 original 105 1 2024-11-11 2024-11-11 2024-11-11 1991-01-06 original 111 1 1964-01-24 1964-01-24 1964-01-24 1964-01-24 new PREHOOK: query: drop table part_change_various_various_date_n6 diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out index e4c29f77e4..4fca524918 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive_llap_io.q.out @@ -536,11 +536,11 @@ POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n5 POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n5@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 c33 b -101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970120000000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original +101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970117179000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original 102 1 0.000000000000000000 127.000000000000000000 32767.000000000000000000 2147483647.000000000000000000 9223372036854775807.000000000000000000 NULL NULL -99999999999999999999.999999999999999999 -99999999999999999999.999000000000000000 -99999999999999999999.999000000000000000 126117919850.597000000000000000 0.0 127.0 32767.0 2.14748365E9 9.223372E18 -1.0E20 -Infinity -Infinity -3.4028233E23 -3.4028233E23 1.26117921E11 0.0 127.0 32767.0 2.147483647E9 9.223372036854776E18 -1.0E20 -Infinity -1.7976931348623157E308 -1.7976931348623157E308 -1.7976931348623157E308 1.26117919850597E11 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359780000000000000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35978 30.774 30.774 30.774 2.70887654E8 original -105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368500000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original +104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359779357910160000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35977935791016 30.774 30.774 30.774 2.70887654E8 original +105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368540000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original 111 1 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 new PREHOOK: query: drop table part_change_various_various_decimal_to_double_n5 PREHOOK: type: DROPTABLE @@ -690,11 +690,11 @@ POSTHOOK: Input: default@part_change_various_various_timestamp_n5 POSTHOOK: Input: default@part_change_various_various_timestamp_n5@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 b -101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 09:54:28.970117179 6229-06-28 09:54:28.97011 6229-06-28 09:54:28.97011 1950-12-18 08:00:00 original -102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL 1970-01-01 00:00:00 NULL 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 2049-12-18 08:00:00 original +101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 02:54:28.970117179 6229-06-28 02:54:28.97011 6229-06-28 02:54:28.97011 1950-12-18 00:00:00 original +102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL NULL NULL 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 2049-12-18 00:00:00 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.640220643 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 13:34:14 1978-08-02 13:34:14 1978-08-02 13:34:14 2021-09-24 07:00:00 original -105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-07 00:20:39.72036854 1991-01-07 00:20:39.72036 1991-01-07 00:20:39.72036 2024-11-11 08:00:00 original +104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.64 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 06:34:14 1978-08-02 06:34:14 1978-08-02 06:34:14 2021-09-24 00:00:00 original +105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-06 16:20:39.72036854 1991-01-06 16:20:39.72036 1991-01-06 16:20:39.72036 2024-11-11 00:00:00 original 111 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL new PREHOOK: query: drop table part_change_various_various_timestamp_n5 PREHOOK: type: DROPTABLE @@ -828,10 +828,10 @@ POSTHOOK: Input: default@part_change_various_various_date_n5 POSTHOOK: Input: default@part_change_various_various_date_n5@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 b -101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-27 original -102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-08 original +101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-28 original +102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-09 original 103 1 NULL NULL NULL NULL original -104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-01 original +104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-02 original 105 1 2024-11-11 2024-11-11 2024-11-11 1991-01-06 original 111 1 1964-01-24 1964-01-24 1964-01-24 1964-01-24 new PREHOOK: query: drop table part_change_various_various_date_n5 diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out index b334b2d0f6..3d6950dd63 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive.q.out @@ -586,11 +586,11 @@ POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n0 POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n0@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 c33 b -101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970120000000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original +101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970117179000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original 102 1 0.000000000000000000 127.000000000000000000 32767.000000000000000000 2147483647.000000000000000000 9223372036854775807.000000000000000000 NULL NULL -99999999999999999999.999999999999999999 -99999999999999999999.999000000000000000 -99999999999999999999.999000000000000000 126117919850.597000000000000000 0.0 127.0 32767.0 2.14748365E9 9.223372E18 -1.0E20 -Infinity -Infinity -3.4028233E23 -3.4028233E23 1.26117921E11 0.0 127.0 32767.0 2.147483647E9 9.223372036854776E18 -1.0E20 -Infinity -1.7976931348623157E308 -1.7976931348623157E308 -1.7976931348623157E308 1.26117919850597E11 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359780000000000000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35978 30.774 30.774 30.774 2.70887654E8 original -105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368500000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original +104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359779357910160000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35977935791016 30.774 30.774 30.774 2.70887654E8 original +105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368540000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original 111 1 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 new PREHOOK: query: drop table part_change_various_various_decimal_to_double_n0 PREHOOK: type: DROPTABLE @@ -765,11 +765,11 @@ POSTHOOK: Input: default@part_change_various_various_timestamp_n0 POSTHOOK: Input: default@part_change_various_various_timestamp_n0@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 b -101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 09:54:28.970117179 6229-06-28 09:54:28.97011 6229-06-28 09:54:28.97011 1950-12-18 08:00:00 original -102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL 1970-01-01 00:00:00 NULL 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 2049-12-18 08:00:00 original +101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 02:54:28.970117179 6229-06-28 02:54:28.97011 6229-06-28 02:54:28.97011 1950-12-18 00:00:00 original +102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL NULL NULL 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 2049-12-18 00:00:00 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.640220643 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 13:34:14 1978-08-02 13:34:14 1978-08-02 13:34:14 2021-09-24 07:00:00 original -105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-07 00:20:39.72036854 1991-01-07 00:20:39.72036 1991-01-07 00:20:39.72036 2024-11-11 08:00:00 original +104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.64 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 06:34:14 1978-08-02 06:34:14 1978-08-02 06:34:14 2021-09-24 00:00:00 original +105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-06 16:20:39.72036854 1991-01-06 16:20:39.72036 1991-01-06 16:20:39.72036 2024-11-11 00:00:00 original 111 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL new PREHOOK: query: drop table part_change_various_various_timestamp_n0 PREHOOK: type: DROPTABLE @@ -928,10 +928,10 @@ POSTHOOK: Input: default@part_change_various_various_date_n0 POSTHOOK: Input: default@part_change_various_various_date_n0@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 b -101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-27 original -102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-08 original +101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-28 original +102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-09 original 103 1 NULL NULL NULL NULL original -104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-01 original +104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-02 original 105 1 2024-11-11 2024-11-11 2024-11-11 1991-01-06 original 111 1 1964-01-24 1964-01-24 1964-01-24 1964-01-24 new PREHOOK: query: drop table part_change_various_various_date_n0 diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive_llap_io.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive_llap_io.q.out index dc401b8986..1c9da6c327 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive_llap_io.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_primitive_llap_io.q.out @@ -588,11 +588,11 @@ POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n4 POSTHOOK: Input: default@part_change_various_various_decimal_to_double_n4@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 c33 b -101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970120000000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original +101 1 1.000000000000000000 -128.000000000000000000 NULL -2147483648.000000000000000000 NULL NULL NULL 99999999999999999999.999999999999999999 99999999999999999999.999900000000000000 99999999999999999999.999900000000000000 134416464868.970117179000000000 1.0 -128.0 NULL -2.14748365E9 NULL 1.0E20 Infinity Infinity 3.4028236E24 3.4028236E24 1.34416466E11 1.0 -128.0 NULL -2.147483648E9 NULL 1.0E20 Infinity 1.7976931348623157E308 1.7976931348623157E308 1.7976931348623157E308 1.3441646486897012E11 original 102 1 0.000000000000000000 127.000000000000000000 32767.000000000000000000 2147483647.000000000000000000 9223372036854775807.000000000000000000 NULL NULL -99999999999999999999.999999999999999999 -99999999999999999999.999000000000000000 -99999999999999999999.999000000000000000 126117919850.597000000000000000 0.0 127.0 32767.0 2.14748365E9 9.223372E18 -1.0E20 -Infinity -Infinity -3.4028233E23 -3.4028233E23 1.26117921E11 0.0 127.0 32767.0 2.147483647E9 9.223372036854776E18 -1.0E20 -Infinity -1.7976931348623157E308 -1.7976931348623157E308 -1.7976931348623157E308 1.26117919850597E11 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359780000000000000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35978 30.774 30.774 30.774 2.70887654E8 original -105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368500000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original +104 1 1.000000000000000000 23.000000000000000000 834.000000000000000000 203332.000000000000000000 888888857923222.000000000000000000 -100.359779357910160000 30.774000000000000000 66475.561431000000000000 66475.561431000000000000 66475.561431000000000000 270887654.000000000000000000 1.0 23.0 834.0 203332.0 8.8888885E14 66475.56 30.774 -100.35978 -100.35978 -100.35978 2.70887648E8 1.0 23.0 834.0 203332.0 8.88888857923222E14 66475.561431 -100.35977935791016 30.774 30.774 30.774 2.70887654E8 original +105 1 0.000000000000000000 -99.000000000000000000 -28300.000000000000000000 -999992.000000000000000000 -222282153733.000000000000000000 NULL 46114.280000000000000000 9250340.750000000000000000 9250340.750000000000000000 9250340.750000000000000000 663178839.720368540000000000 0.0 -99.0 -28300.0 -999992.0 -2.22282154E11 9250341.0 46114.28 NULL NULL NULL 6.6317882E8 0.0 -99.0 -28300.0 -999992.0 -2.22282153733E11 9250340.75 NULL 46114.28 46114.28 46114.28 6.631788397203685E8 original 111 1 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -46114.284799488000000000 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 -9.0E-8 new PREHOOK: query: drop table part_change_various_various_decimal_to_double_n4 PREHOOK: type: DROPTABLE @@ -768,11 +768,11 @@ POSTHOOK: Input: default@part_change_various_various_timestamp_n4 POSTHOOK: Input: default@part_change_various_various_timestamp_n4@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 b -101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 09:54:28.970117179 6229-06-28 09:54:28.97011 6229-06-28 09:54:28.97011 1950-12-18 08:00:00 original -102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL 1970-01-01 00:00:00 NULL 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 5966-07-09 10:30:50.597 2049-12-18 08:00:00 original +101 1 1970-01-01 00:00:00.001 1969-12-31 23:59:59.872 NULL 1969-12-07 03:28:36.352 NULL NULL NULL NULL 6229-06-28 02:54:28.970117179 6229-06-28 02:54:28.97011 6229-06-28 02:54:28.97011 1950-12-18 00:00:00 original +102 1 1970-01-01 00:00:00 1970-01-01 00:00:00.127 1970-01-01 00:00:32.767 1970-01-25 20:31:23.647 NULL NULL NULL NULL 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 5966-07-09 03:30:50.597 2049-12-18 00:00:00 original 103 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL original -104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.640220643 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 13:34:14 1978-08-02 13:34:14 1978-08-02 13:34:14 2021-09-24 07:00:00 original -105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-07 00:20:39.72036854 1991-01-07 00:20:39.72036 1991-01-07 00:20:39.72036 2024-11-11 08:00:00 original +104 1 1970-01-01 00:00:00.001 1970-01-01 00:00:00.023 1970-01-01 00:00:00.834 1970-01-01 00:03:23.332 NULL 1969-12-31 23:58:19.64 1970-01-01 00:00:30.774 1970-01-01 18:27:55.561431 1978-08-02 06:34:14 1978-08-02 06:34:14 1978-08-02 06:34:14 2021-09-24 00:00:00 original +105 1 1970-01-01 00:00:00 1969-12-31 23:59:59.901 1969-12-31 23:59:31.7 1969-12-31 23:43:20.008 1962-12-16 06:57:26.267 NULL 1970-01-01 12:48:34.28 1970-04-18 01:32:20.75 1991-01-06 16:20:39.72036854 1991-01-06 16:20:39.72036 1991-01-06 16:20:39.72036 2024-11-11 00:00:00 original 111 1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL new PREHOOK: query: drop table part_change_various_various_timestamp_n4 PREHOOK: type: DROPTABLE @@ -932,10 +932,10 @@ POSTHOOK: Input: default@part_change_various_various_date_n4 POSTHOOK: Input: default@part_change_various_various_date_n4@part=1 #### A masked pattern was here #### insert_num part c1 c2 c3 c4 b -101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-27 original -102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-08 original +101 1 1950-12-18 1950-12-18 1950-12-18 6229-06-28 original +102 1 2049-12-18 2049-12-18 2049-12-18 5966-07-09 original 103 1 NULL NULL NULL NULL original -104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-01 original +104 1 2021-09-24 2021-09-24 2021-09-24 1978-08-02 original 105 1 2024-11-11 2024-11-11 2024-11-11 1991-01-06 original 111 1 1964-01-24 1964-01-24 1964-01-24 1964-01-24 new PREHOOK: query: drop table part_change_various_various_date_n4 diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index cd7dd7096b..cfbbf821e8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -744,7 +744,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -767,7 +767,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.l3_monthly_dw_dimplan @@ -1259,7 +1259,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1282,7 +1282,7 @@ STAGE PLANS: serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242697 + totalSize 5242699 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.l3_monthly_dw_dimplan diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index fe80a40620..18916bcb80 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -3906,7 +3906,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesnullorc - Statistics: Num rows: 12288 Data size: 9450 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 9470 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3914,7 +3914,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 12288 Data size: 9450 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 9470 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out index 2af84f8947..c206eba762 100644 --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out @@ -97,6 +97,7 @@ File Version: 0.12 with ORC_517 Rows: 1049 Compression: ZLIB Compression size: 262144 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -269,7 +270,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 98 loadFactor: 0.0156 expectedFpp: 5.9604645E-8 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 102 loadFactor: 0.0163 expectedFpp: 6.9948186E-8 -File length: 32312 bytes +File length: 32313 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -294,6 +295,7 @@ File Version: 0.12 with ORC_517 Rows: 1049 Compression: ZLIB Compression size: 262144 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -466,7 +468,7 @@ Stripes: Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 174 loadFactor: 0.0181 expectedFpp: 6.426078E-13 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 181 loadFactor: 0.0189 expectedFpp: 8.4693775E-13 -File length: 36956 bytes +File length: 36958 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ @@ -503,6 +505,7 @@ File Version: 0.12 with ORC_517 Rows: 1049 Compression: ZLIB Compression size: 262144 +Calendar: Julian/Gregorian Type: struct Stripe Statistics: @@ -675,7 +678,7 @@ Stripes: Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 98 loadFactor: 0.0156 expectedFpp: 5.9604645E-8 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 102 loadFactor: 0.0163 expectedFpp: 6.9948186E-8 -File length: 32312 bytes +File length: 32313 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out index d9f6846236..a1966e25f1 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out @@ -431,3 +431,119 @@ POSTHOOK: query: select * from newtypestbl_n2 where da between '1970-02-18' and POSTHOOK: type: QUERY POSTHOOK: Input: default@newtypestbl_n2 #### A masked pattern was here #### +PREHOOK: query: insert overwrite table newtypestbl_n2 select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("999-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1820-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@newtypestbl_n2 +POSTHOOK: query: insert overwrite table newtypestbl_n2 select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("999-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1820-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@newtypestbl_n2 +POSTHOOK: Lineage: newtypestbl_n2.c EXPRESSION [] +POSTHOOK: Lineage: newtypestbl_n2.d EXPRESSION [] +POSTHOOK: Lineage: newtypestbl_n2.da EXPRESSION [] +POSTHOOK: Lineage: newtypestbl_n2.v EXPRESSION [] +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da='999-02-20' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +PREHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +POSTHOOK: query: select * from newtypestbl_n2 where da=cast('999-02-20' as date) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypestbl_n2 +#### A masked pattern was here #### +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 +apple bee 0.220 0999-02-20 diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index db8db1c922..27583b82b8 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -42,12 +42,12 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; -import org.apache.avro.UnresolvedUnionException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.common.type.CalendarUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveChar; @@ -88,6 +88,11 @@ */ private ZoneId writerTimezone = null; + /** + * Whether the file was written using proleptic Gregorian or hybrid calendar. + */ + private Boolean writerProleptic = null; + private Configuration configuration = null; AvroDeserializer() {} @@ -169,9 +174,10 @@ public Object deserialize(List columnNames, List columnTypes, GenericRecord r = recordWritable.getRecord(); Schema fileSchema = recordWritable.getFileSchema(); writerTimezone = recordWritable.getWriterTimezone(); + writerProleptic = recordWritable.getWriterProleptic(); - UID recordReaderId = recordWritable.getRecordReaderID(); - //If the record reader (from which the record is originated) is already seen and valid, + UID recordReaderId = recordWritable.getRecordReaderID(); + //If the record reader (from which the record is originated) is already seen and valid, //no need to re-encode the record. if(!noEncodingNeeded.contains(recordReaderId)) { SchemaReEncoder reEncoder = null; @@ -311,16 +317,30 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco str = datum.toString(); HiveVarchar hvc = new HiveVarchar(str, maxLength); return hvc; - case DATE: + case DATE: { if (recordSchema.getType() != Type.INT) { throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType()); } - return Date.ofEpochMilli(DateWritableV2.daysToMillis((Integer)datum)); - case TIMESTAMP: + final boolean skipProlepticConversion; + if (writerProleptic != null) { + skipProlepticConversion = writerProleptic; + } else { + if (configuration != null) { + skipProlepticConversion = HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT); + } else { + skipProlepticConversion = HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT.defaultBoolVal; + } + } + + return Date.ofEpochMilli(DateWritableV2.daysToMillis( + skipProlepticConversion ? (Integer) datum : CalendarUtils.convertDateToProleptic((Integer) datum))); + } + case TIMESTAMP: { if (recordSchema.getType() != Type.LONG) { throw new AvroSerdeException( - "Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType()); + "Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType()); } // If a time zone is found in file metadata (property name: writer.time.zone), convert the // timestamp to that (writer) time zone in order to emulate time zone agnostic behavior. @@ -328,23 +348,40 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco // to the server's (reader) time zone for backwards compatibility reasons - unless the // session level configuration hive.avro.timestamp.skip.conversion is set to true, in which // case we assume it was written by a time zone agnostic writer, so we don't convert it. - boolean skipConversion; + final boolean skipUTCConversion; if (configuration != null) { - skipConversion = HiveConf.getBoolVar( + skipUTCConversion = HiveConf.getBoolVar( configuration, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION); } else { - skipConversion = HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION.defaultBoolVal; + skipUTCConversion = HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_SKIP_CONVERSION.defaultBoolVal; } ZoneId convertToTimeZone; if (writerTimezone != null) { convertToTimeZone = writerTimezone; - } else if (skipConversion) { + } else if (skipUTCConversion) { convertToTimeZone = ZoneOffset.UTC; } else { convertToTimeZone = TimeZone.getDefault().toZoneId(); } - Timestamp timestamp = Timestamp.ofEpochMilli((Long)datum); - return TimestampTZUtil.convertTimestampToZone(timestamp, ZoneOffset.UTC, convertToTimeZone); + final boolean skipProlepticConversion; + if (writerProleptic != null) { + skipProlepticConversion = writerProleptic; + } else { + if (configuration != null) { + skipProlepticConversion = HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT); + } else { + skipProlepticConversion = HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT.defaultBoolVal; + } + } + Timestamp timestamp = TimestampTZUtil.convertTimestampToZone( + Timestamp.ofEpochMilli((Long) datum), ZoneOffset.UTC, convertToTimeZone); + if (!skipProlepticConversion) { + timestamp = Timestamp.ofEpochMilli( + CalendarUtils.convertTimeToProleptic(timestamp.toEpochMilli())); + } + return timestamp; + } default: return datum; } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java index 095197c2ed..92b81a73f0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java @@ -52,6 +52,8 @@ // Time zone file was written in, from metadata private ZoneId writerTimezone = null; + private Boolean writerProleptic = null; + /** * Unique Id determine which record reader created this record */ @@ -78,8 +80,9 @@ public AvroGenericRecordWritable(GenericRecord record) { this.record = record; } - public AvroGenericRecordWritable(ZoneId writerTimezone) { + public AvroGenericRecordWritable(ZoneId writerTimezone, Boolean writerProleptic) { this.writerTimezone = writerTimezone; + this.writerProleptic = writerProleptic; } @Override @@ -153,4 +156,8 @@ public void setFileSchema(Schema originalSchema) { public ZoneId getWriterTimezone() { return writerTimezone; } + + public Boolean getWriterProleptic() { + return writerProleptic; + } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index 905e19b72a..ff4a197d90 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -60,6 +60,7 @@ public static final String DATE_TYPE_NAME = "date"; public static final String TIMESTAMP_TYPE_NAME = "timestamp-millis"; public static final String WRITER_TIME_ZONE = "writer.time.zone"; + public static final String WRITER_PROLEPTIC = "writer.proleptic"; public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType"; public static final String AVRO_PROP_PRECISION = "precision"; public static final String AVRO_PROP_SCALE = "scale"; @@ -148,7 +149,7 @@ public void initialize(Configuration configuration, Properties properties) throw } if(!badSchema) { - this.avroSerializer = new AvroSerializer(); + this.avroSerializer = new AvroSerializer(configuration); this.avroDeserializer = new AvroDeserializer(configuration); } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java index 4331c11398..490434d2f1 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java @@ -31,13 +31,16 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Fixed; import org.apache.avro.generic.GenericEnumSymbol; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.common.type.CalendarUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -56,13 +59,22 @@ import org.apache.hadoop.io.Writable; class AvroSerializer { + /** * The Schema to use when serializing Map keys. * Since we're sharing this across Serializer instances, it must be immutable; * any properties need to be added in a static initializer. */ private static final Schema STRING_SCHEMA = Schema.create(Schema.Type.STRING); - AvroGenericRecordWritable cache = new AvroGenericRecordWritable(); + private AvroGenericRecordWritable cache = new AvroGenericRecordWritable(); + private boolean defaultProleptic; + + AvroSerializer() {} + + AvroSerializer(Configuration configuration) { + this.defaultProleptic = HiveConf.getBoolVar( + configuration, ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN); + } // Hive is pretty simple (read: stupid) in writing out values via the serializer. // We're just going to go through, matching indices. Hive formats normally @@ -210,12 +222,15 @@ private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fi return vc.getValue(); case DATE: Date date = ((DateObjectInspector)fieldOI).getPrimitiveJavaObject(structFieldData); - return DateWritableV2.dateToDays(date); + return defaultProleptic ? date.toEpochDay() : + CalendarUtils.convertDateToHybrid(date.toEpochDay()); case TIMESTAMP: Timestamp timestamp = ((TimestampObjectInspector) fieldOI).getPrimitiveJavaObject(structFieldData); + long millis = defaultProleptic ? timestamp.toEpochMilli() : + CalendarUtils.convertTimeToHybrid(timestamp.toEpochMilli()); timestamp = TimestampTZUtil.convertTimestampToZone( - timestamp, TimeZone.getDefault().toZoneId(), ZoneOffset.UTC); + Timestamp.ofEpochMilli(millis), TimeZone.getDefault().toZoneId(), ZoneOffset.UTC); return timestamp.toEpochMilli(); case UNKNOWN: throw new AvroSerdeException("Received UNKNOWN primitive category."); diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java index 1cd03f7368..514bca7d1b 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java @@ -293,7 +293,7 @@ public void canDeserializeTimestamps() throws SerDeException, IOException { record.put("timestampField", 1546387200999L); assertTrue(GENERIC_DATA.validate(readerSchema, record)); - AvroGenericRecordWritable agrw = new AvroGenericRecordWritable(ZoneId.of("America/New_York")); + AvroGenericRecordWritable agrw = new AvroGenericRecordWritable(ZoneId.of("America/New_York"), false); agrw.setRecord(record); agrw.setFileSchema(readerSchema); agrw.setRecordReaderID(new UID()); diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileFormatProxy.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileFormatProxy.java index d403af1e81..e696591ab6 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileFormatProxy.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileFormatProxy.java @@ -21,6 +21,7 @@ import java.nio.ByteBuffer; import java.util.List; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.Metastore.SplitInfos; @@ -37,7 +38,8 @@ * @param fileMetadata File metadata from metastore cache. * @return The result to return to client for this file, or null if file is eliminated. */ - SplitInfos applySargToMetadata(SearchArgument sarg, ByteBuffer fileMetadata) throws IOException; + SplitInfos applySargToMetadata(SearchArgument sarg, ByteBuffer fileMetadata, + Configuration conf) throws IOException; /** * @param fs The filesystem of the file. diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java index ff30260266..73917fc542 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/FileMetadataHandler.java @@ -38,7 +38,7 @@ public abstract class FileMetadataHandler { protected static final Logger LOG = LoggerFactory.getLogger(FileMetadataHandler.class); - private Configuration conf; + protected Configuration conf; private PartitionExpressionProxy expressionProxy; private FileFormatProxy fileFormatProxy; private MetadataStore store; diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java index d1a546f229..a65e84223b 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java @@ -106,6 +106,7 @@ public void testStringify() throws IOException { LongColumnVector x1 = new LongColumnVector(); TimestampColumnVector x2 = new TimestampColumnVector(); x2.setIsUTC(true); + x2.setUsingProlepticCalendar(true); StructColumnVector x = new StructColumnVector(1024, x1, x2); BytesColumnVector y = new BytesColumnVector(); batch.cols[0] = x; @@ -140,6 +141,7 @@ public void testStringify2() throws IOException { LongColumnVector x1 = new LongColumnVector(); TimestampColumnVector x2 = new TimestampColumnVector(); x2.setIsUTC(true); + x2.setUsingProlepticCalendar(true); StructColumnVector x = new StructColumnVector(1024, x1, x2); BytesColumnVector y = new BytesColumnVector(); batch.cols[0] = x;