diff --git data/files/parquet_types.txt data/files/parquet_types.txt index 0be390b..9d81c3c 100644 --- data/files/parquet_types.txt +++ data/files/parquet_types.txt @@ -1,21 +1,21 @@ -100|1|1|1.0|0.0|abc -101|2|2|1.1|0.3|def -102|3|3|1.2|0.6|ghi -103|1|4|1.3|0.9|jkl -104|2|5|1.4|1.2|mno -105|3|1|1.0|1.5|pqr -106|1|2|1.1|1.8|stu -107|2|3|1.2|2.1|vwx -108|3|4|1.3|2.4|yza -109|1|5|1.4|2.7|bcd -110|2|1|1.0|3.0|efg -111|3|2|1.1|3.3|hij -112|1|3|1.2|3.6|klm -113|2|4|1.3|3.9|nop -114|3|5|1.4|4.2|qrs -115|1|1|1.0|4.5|tuv -116|2|2|1.1|4.8|wxy -117|3|3|1.2|5.1|zab -118|1|4|1.3|5.4|cde -119|2|5|1.4|5.7|fgh -120|3|1|1.0|6.0|ijk +100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111 +101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222 +102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333 +103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444 +104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555 +105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666 +106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777 +107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888 +108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999 +109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101 +110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111 +111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121 +112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131 +113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141 +114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151 +115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161 +116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171 +117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181 +118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191 +119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202 +120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212 diff --git pom.xml pom.xml index 2b91846..8e6e87f 100644 --- pom.xml +++ pom.xml @@ -124,6 +124,7 @@ 1.14 0.9.94 1.1 + 3.5.2 20090211 4.10 2.22 diff --git ql/pom.xml ql/pom.xml index 13c477a..e7c4b63 100644 --- ql/pom.xml +++ ql/pom.xml @@ -166,6 +166,11 @@ ${jackson.version} + org.jodd + jodd-core + ${jodd.version} + + org.codehaus.jackson jackson-mapper-asl ${jackson.version} @@ -497,6 +502,7 @@ com.twitter:parquet-hadoop-bundle org.apache.thrift:libthrift commons-lang:commons-lang + org.jodd:jodd-core org.json:json org.apache.avro:avro org.apache.avro:avro-mapred diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 218c007..73cf0f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -14,20 +14,23 @@ package org.apache.hadoop.hive.ql.io.parquet.convert; import java.math.BigDecimal; - +import java.sql.Timestamp; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; - import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; + import parquet.column.Dictionary; +import parquet.example.data.simple.NanoTime; import parquet.io.api.Binary; import parquet.io.api.Converter; import parquet.io.api.PrimitiveConverter; @@ -43,6 +46,7 @@ EDOUBLE_CONVERTER(Double.TYPE) { @Override + Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { return new PrimitiveConverter() { @Override @@ -128,6 +132,19 @@ protected HiveDecimalWritable convert(Binary binary) { } }; } + }, + ETIMESTAMP_CONVERTER(TimestampWritable.class) { + @Override + Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + return new BinaryConverter(type, parent, index) { + @Override + protected TimestampWritable convert(Binary binary) { + NanoTime nt = NanoTime.fromBinary(binary); + Timestamp ts = NanoTimeUtils.getTimestamp(nt); + return new TimestampWritable(ts); + } + }; + } }; final Class _type; @@ -143,6 +160,10 @@ private ETypeConverter(final Class type) { abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent); public static Converter getNewConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { + //TODO- cleanup once parquet support Timestamp type annotation. + return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); + } if (OriginalType.DECIMAL == type.getOriginalType()) { return EDECIMAL_CONVERTER.getConverter(type, index, parent); } else if (OriginalType.UTF8 == type.getOriginalType()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java index 29f7e11..99901f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java @@ -78,7 +78,7 @@ private static Type convertType(final String name, final TypeInfo typeInfo, fina // TODO : binaryTypeInfo is a byte array. Need to map it throw new UnsupportedOperationException("Binary type not implemented"); } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { - throw new UnsupportedOperationException("Timestamp type not implemented"); + return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name); } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { throw new UnsupportedOperationException("Void type not implemented"); } else if (typeInfo instanceof DecimalTypeInfo) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java index 57161d8..47bf69c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetPrimitiveInspectorFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -98,7 +99,7 @@ private ObjectInspector getObjectInspector(final TypeInfo typeInfo) { } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) { return ParquetPrimitiveInspectorFactory.parquetShortInspector; } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { - throw new UnsupportedOperationException("Parquet does not support timestamp. See HIVE-6384"); + return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384"); } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index 4cad1cb..e3e327c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -46,6 +47,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -276,6 +278,8 @@ private Writable createPrimitive(final Object obj, final PrimitiveObjectInspecto } System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes/ones. return new BytesWritable(tgt); + case TIMESTAMP: + return new TimestampWritable(((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj)); default: throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java new file mode 100644 index 0000000..06987ad --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java @@ -0,0 +1,87 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.utils; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Calendar; +import java.util.TimeZone; + +import jodd.datetime.JDateTime; +import jodd.datetime.TimeUtil; +import parquet.example.data.simple.NanoTime; + +/** + * Utilities for converting from java.sql.Timestamp to parquet timestamp. + * This utilizes the Jodd library. + */ +public class NanoTimeUtils { + static final long NANOS_PER_SECOND = 1000000000; + static final long SECONDS_PER_MINUTE = 60; + static final long MINUTES_PER_HOUR = 60; + + private static final ThreadLocal parquetTsCalendar = new ThreadLocal(); + + private static Calendar getCalendar() { + //Calendar.getInstance calculates the current-time needlessly, so cache an instance. + if (parquetTsCalendar.get() == null) { + parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); + } + return parquetTsCalendar.get(); + } + + public static NanoTime getNanoTime(Timestamp ts) { + + Calendar calendar = getCalendar(); + calendar.setTime(ts); + JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR), + calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1. + calendar.get(Calendar.DAY_OF_MONTH)); + int days = jDateTime.getJulianDayNumber(); + + long hour = calendar.get(Calendar.HOUR_OF_DAY); + long minute = calendar.get(Calendar.MINUTE); + long second = calendar.get(Calendar.SECOND); + long nanos = ts.getNanos(); + long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute + + NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour; + return new NanoTime(days, nanosOfDay); + } + + public static Timestamp getTimestamp(NanoTime nt) { + int julianDay = nt.getJulianDay(); + long nanosOfDay = nt.getTimeOfDayNanos(); + + JDateTime jDateTime = new JDateTime((double) julianDay); + Calendar calendar = getCalendar(); + calendar.set(Calendar.YEAR, jDateTime.getYear()); + calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1. + calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); + + long remainder = nanosOfDay; + int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR)); + remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR); + int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE)); + remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE); + int seconds = (int) (remainder / (NANOS_PER_SECOND)); + long nanos = remainder % NANOS_PER_SECOND; + + calendar.set(Calendar.HOUR_OF_DAY, hour); + calendar.set(Calendar.MINUTE, minutes); + calendar.set(Calendar.SECOND, seconds); + Timestamp ts = new Timestamp(calendar.getTimeInMillis()); + ts.setNanos((int) nanos); + return ts; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 6169353..8bb9cb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -13,10 +13,14 @@ */ package org.apache.hadoop.hive.ql.io.parquet.write; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; @@ -25,6 +29,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; +import parquet.example.data.simple.NanoTime; import parquet.io.ParquetEncodingException; import parquet.io.api.Binary; import parquet.io.api.RecordConsumer; @@ -149,6 +154,10 @@ private void writePrimitive(final Writable value) { throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented"); } else if (value instanceof BytesWritable) { recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes()))); + } else if (value instanceof TimestampWritable) { + Timestamp ts = ((TimestampWritable) value).getTimestamp(); + NanoTime nt = NanoTimeUtils.getNanoTime(ts); + nt.writeValue(recordConsumer); } else { throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass()); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java new file mode 100644 index 0000000..f56a643 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -0,0 +1,201 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.serde; + +import java.sql.Timestamp; +import java.util.Calendar; +import java.util.Date; +import java.util.TimeZone; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils; + +import parquet.example.data.simple.NanoTime; + +/** + * Tests util-libraries used for parquet-timestamp. + */ +public class TestParquetTimestampUtils extends TestCase { + + public void testJulianDay() { + //check if May 23, 1968 is Julian Day 2440000 + Calendar cal = Calendar.getInstance(); + cal.set(Calendar.YEAR, 1968); + cal.set(Calendar.MONTH, Calendar.MAY); + cal.set(Calendar.DAY_OF_MONTH, 23); + cal.set(Calendar.HOUR_OF_DAY, 0); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + + Timestamp ts = new Timestamp(cal.getTimeInMillis()); + NanoTime nt = NanoTimeUtils.getNanoTime(ts); + Assert.assertEquals(nt.getJulianDay(), 2440000); + + Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt); + Assert.assertEquals(tsFetched, ts); + + //check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005. + Calendar cal1 = Calendar.getInstance(); + cal1.set(Calendar.YEAR, 2005); + cal1.set(Calendar.MONTH, Calendar.JANUARY); + cal1.set(Calendar.DAY_OF_MONTH, 1); + cal1.set(Calendar.HOUR_OF_DAY, 0); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + + Timestamp ts1 = new Timestamp(cal1.getTimeInMillis()); + NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1); + + Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1); + Assert.assertEquals(ts1Fetched, ts1); + + Calendar cal2 = Calendar.getInstance(); + cal2.set(Calendar.YEAR, 2005); + cal2.set(Calendar.MONTH, Calendar.JANUARY); + cal2.set(Calendar.DAY_OF_MONTH, 31); + cal2.set(Calendar.HOUR_OF_DAY, 0); + cal2.setTimeZone(TimeZone.getTimeZone("UTC")); + + Timestamp ts2 = new Timestamp(cal2.getTimeInMillis()); + NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2); + + Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2); + Assert.assertEquals(ts2Fetched, ts2); + Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30); + } + + public void testNanos() { + //case 1: 01:01:01.0000000001 + Calendar cal = Calendar.getInstance(); + cal.set(Calendar.YEAR, 1968); + cal.set(Calendar.MONTH, Calendar.MAY); + cal.set(Calendar.DAY_OF_MONTH, 23); + cal.set(Calendar.HOUR_OF_DAY, 1); + cal.set(Calendar.MINUTE, 1); + cal.set(Calendar.SECOND, 1); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + Timestamp ts = new Timestamp(cal.getTimeInMillis()); + ts.setNanos(1); + + //(1*60*60 + 1*60 + 1) * 10e9 + 1 + NanoTime nt = NanoTimeUtils.getNanoTime(ts); + Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L); + + //case 2: 23:59:59.999999999 + cal = Calendar.getInstance(); + cal.set(Calendar.YEAR, 1968); + cal.set(Calendar.MONTH, Calendar.MAY); + cal.set(Calendar.DAY_OF_MONTH, 23); + cal.set(Calendar.HOUR_OF_DAY, 23); + cal.set(Calendar.MINUTE, 59); + cal.set(Calendar.SECOND, 59); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + ts = new Timestamp(cal.getTimeInMillis()); + ts.setNanos(999999999); + + //(23*60*60 + 59*60 + 59)*10e9 + 999999999 + nt = NanoTimeUtils.getNanoTime(ts); + Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L); + + //case 3: verify the difference. + Calendar cal2 = Calendar.getInstance(); + cal2.set(Calendar.YEAR, 1968); + cal2.set(Calendar.MONTH, Calendar.MAY); + cal2.set(Calendar.DAY_OF_MONTH, 23); + cal2.set(Calendar.HOUR_OF_DAY, 0); + cal2.set(Calendar.MINUTE, 10); + cal2.set(Calendar.SECOND, 0); + cal2.setTimeZone(TimeZone.getTimeZone("GMT")); + Timestamp ts2 = new Timestamp(cal2.getTimeInMillis()); + ts2.setNanos(10); + + Calendar cal1 = Calendar.getInstance(); + cal1.set(Calendar.YEAR, 1968); + cal1.set(Calendar.MONTH, Calendar.MAY); + cal1.set(Calendar.DAY_OF_MONTH, 23); + cal1.set(Calendar.HOUR_OF_DAY, 0); + cal1.set(Calendar.MINUTE, 0); + cal1.set(Calendar.SECOND, 0); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + Timestamp ts1 = new Timestamp(cal1.getTimeInMillis()); + ts1.setNanos(1); + + NanoTime n2 = NanoTimeUtils.getNanoTime(ts2); + NanoTime n1 = NanoTimeUtils.getNanoTime(ts1); + + Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L); + } + + public void testTimezone() { + Calendar cal = Calendar.getInstance(); + cal.set(Calendar.YEAR, 1968); + cal.set(Calendar.MONTH, Calendar.MAY); + cal.set(Calendar.DAY_OF_MONTH, 23); + if ((TimeZone.getTimeZone("US/Pacific").inDaylightTime(new Date()))) { + cal.set(Calendar.HOUR_OF_DAY, 18); + } else { + cal.set(Calendar.HOUR_OF_DAY, 17); + } + cal.set(Calendar.MINUTE, 1); + cal.set(Calendar.SECOND, 1); + cal.setTimeZone(TimeZone.getTimeZone("US/Pacific")); + Timestamp ts = new Timestamp(cal.getTimeInMillis()); + ts.setNanos(1); + + //18:00 PST = 01:00 GMT (if daylight-savings) + //17:00 PST = 01:00 GMT (if not daylight savings) + //(1*60*60 + 1*60 + 1)*10e9 + 1 + NanoTime nt = NanoTimeUtils.getNanoTime(ts); + Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L); + + //in both cases, this will be the next day in GMT + Assert.assertEquals(nt.getJulianDay(), 2440001); + } + + public void testValues() { + //exercise a broad range of timestamps close to the present. + verifyTsString("2011-01-01 01:01:01.111111111"); + verifyTsString("2012-02-02 02:02:02.222222222"); + verifyTsString("2013-03-03 03:03:03.333333333"); + verifyTsString("2014-04-04 04:04:04.444444444"); + verifyTsString("2015-05-05 05:05:05.555555555"); + verifyTsString("2016-06-06 06:06:06.666666666"); + verifyTsString("2017-07-07 07:07:07.777777777"); + verifyTsString("2018-08-08 08:08:08.888888888"); + verifyTsString("2019-09-09 09:09:09.999999999"); + verifyTsString("2020-10-10 10:10:10.101010101"); + verifyTsString("2021-11-11 11:11:11.111111111"); + verifyTsString("2022-12-12 12:12:12.121212121"); + verifyTsString("2023-01-02 13:13:13.131313131"); + verifyTsString("2024-02-02 14:14:14.141414141"); + verifyTsString("2025-03-03 15:15:15.151515151"); + verifyTsString("2026-04-04 16:16:16.161616161"); + verifyTsString("2027-05-05 17:17:17.171717171"); + verifyTsString("2028-06-06 18:18:18.181818181"); + verifyTsString("2029-07-07 19:19:19.191919191"); + verifyTsString("2030-08-08 20:20:20.202020202"); + verifyTsString("2031-09-09 21:21:21.212121212"); + + //test some extreme cases. + verifyTsString("9999-09-09 09:09:09.999999999"); + verifyTsString("0001-01-01 00:00:00.0"); + } + + private void verifyTsString(String tsString) { + Timestamp ts = Timestamp.valueOf(tsString); + NanoTime nt = NanoTimeUtils.getNanoTime(ts); + Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt); + Assert.assertEquals(tsString, tsFetched.toString()); + } +} diff --git ql/src/test/queries/clientnegative/parquet_timestamp.q ql/src/test/queries/clientnegative/parquet_timestamp.q deleted file mode 100644 index 4ef36fa..0000000 --- ql/src/test/queries/clientnegative/parquet_timestamp.q +++ /dev/null @@ -1,3 +0,0 @@ -drop table if exists parquet_timestamp; - -create table parquet_timestamp (t timestamp) stored as parquet; diff --git ql/src/test/queries/clientpositive/parquet_types.q ql/src/test/queries/clientpositive/parquet_types.q index 5d6333c..cb0dcfd 100644 --- ql/src/test/queries/clientpositive/parquet_types.q +++ ql/src/test/queries/clientpositive/parquet_types.q @@ -7,7 +7,8 @@ CREATE TABLE parquet_types_staging ( csmallint smallint, cfloat float, cdouble double, - cstring1 string + cstring1 string, + t timestamp ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; @@ -17,7 +18,8 @@ CREATE TABLE parquet_types ( csmallint smallint, cfloat float, cdouble double, - cstring1 string + cstring1 string, + t timestamp ) STORED AS PARQUET; LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging; diff --git ql/src/test/results/clientpositive/parquet_types.q.out ql/src/test/results/clientpositive/parquet_types.q.out index c23f7f1..dc6dc73 100644 --- ql/src/test/results/clientpositive/parquet_types.q.out +++ ql/src/test/results/clientpositive/parquet_types.q.out @@ -12,7 +12,8 @@ PREHOOK: query: CREATE TABLE parquet_types_staging ( csmallint smallint, cfloat float, cdouble double, - cstring1 string + cstring1 string, + t timestamp ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' PREHOOK: type: CREATETABLE @@ -23,7 +24,8 @@ POSTHOOK: query: CREATE TABLE parquet_types_staging ( csmallint smallint, cfloat float, cdouble double, - cstring1 string + cstring1 string, + t timestamp ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' POSTHOOK: type: CREATETABLE @@ -35,7 +37,8 @@ PREHOOK: query: CREATE TABLE parquet_types ( csmallint smallint, cfloat float, cdouble double, - cstring1 string + cstring1 string, + t timestamp ) STORED AS PARQUET PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -45,7 +48,8 @@ POSTHOOK: query: CREATE TABLE parquet_types ( csmallint smallint, cfloat float, cdouble double, - cstring1 string + cstring1 string, + t timestamp ) STORED AS PARQUET POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -72,6 +76,7 @@ POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_typ POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] PREHOOK: query: SELECT * FROM parquet_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types @@ -80,27 +85,27 @@ POSTHOOK: query: SELECT * FROM parquet_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types #### A masked pattern was here #### -100 1 1 1.0 0.0 abc -101 2 2 1.1 0.3 def -102 3 3 1.2 0.6 ghi -103 1 4 1.3 0.9 jkl -104 2 5 1.4 1.2 mno -105 3 1 1.0 1.5 pqr -106 1 2 1.1 1.8 stu -107 2 3 1.2 2.1 vwx -108 3 4 1.3 2.4 yza -109 1 5 1.4 2.7 bcd -110 2 1 1.0 3.0 efg -111 3 2 1.1 3.3 hij -112 1 3 1.2 3.6 klm -113 2 4 1.3 3.9 nop -114 3 5 1.4 4.2 qrs -115 1 1 1.0 4.5 tuv -116 2 2 1.1 4.8 wxy -117 3 3 1.2 5.1 zab -118 1 4 1.3 5.4 cde -119 2 5 1.4 5.7 fgh -120 3 1 1.0 6.0 ijk +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 +115 1 1 1.0 4.5 tuv 2026-04-04 16:16:16.161616161 +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 PREHOOK: query: SELECT ctinyint, MAX(cint), MIN(csmallint),