diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 3fc0129..29ba234 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -135,7 +135,7 @@ protected HiveDecimalWritable convert(Binary binary) { }; } }, - ETIMESTAMP_CONVERTER(TimestampWritable.class) { + ETIMESTAMP_INT96_CONVERTER(TimestampWritable.class) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new BinaryConverter(type, parent, index) { @@ -152,6 +152,17 @@ protected TimestampWritable convert(Binary binary) { }; } }, + ETIMESTAMP_CONVERTER(TimestampWritable.class) { + @Override + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + parent.set(index, new TimestampWritable(new Timestamp(value))); + } + }; + } + }, EDATE_CONVERTER(DateWritable.class) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { @@ -179,15 +190,18 @@ private ETypeConverter(final Class type) { public static PrimitiveConverter getNewConverter(final PrimitiveType type, final int index, final ConverterParent parent) { if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { - //TODO- cleanup once parquet support Timestamp type annotation. - return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); - } - if (OriginalType.DECIMAL == type.getOriginalType()) { - return EDECIMAL_CONVERTER.getConverter(type, index, parent); - } else if (OriginalType.UTF8 == type.getOriginalType()) { - return ESTRING_CONVERTER.getConverter(type, index, parent); - } else if (OriginalType.DATE == type.getOriginalType()) { - return EDATE_CONVERTER.getConverter(type, index, parent); + return ETypeConverter.ETIMESTAMP_INT96_CONVERTER.getConverter(type, index, parent); + } + + switch (type.getOriginalType()) { + case TIMESTAMP_MILLIS: + return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); + case DECIMAL: + return EDECIMAL_CONVERTER.getConverter(type, index, parent); + case UTF8: + return ESTRING_CONVERTER.getConverter(type, index, parent); + case DATE: + return EDATE_CONVERTER.getConverter(type, index, parent); } Class javaType = type.getPrimitiveTypeName().javaType; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java index 43c772f..abe2e0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java @@ -79,7 +79,7 @@ private static Type convertType(final String name, final TypeInfo typeInfo, } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) { return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name); } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) { - return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name); + return Types.primitive(PrimitiveTypeName.INT64, repetition).as(OriginalType.TIMESTAMP_MILLIS).named(name); } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { throw new UnsupportedOperationException("Void type not implemented"); } else if (typeInfo.getTypeName().toLowerCase().startsWith( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 5bcb270..0bd3279 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -294,7 +294,7 @@ private void writePrimitive(final Object value, final PrimitiveObjectInspector i break; case TIMESTAMP: Timestamp ts = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value); - recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, false).toBinary()); + recordConsumer.addLong(ts.getTime()); break; case DECIMAL: HiveDecimal vDecimal = ((HiveDecimal)inspector.getPrimitiveJavaObject(value)); diff --git a/ql/src/test/results/clientpositive/parquet_types.q.out b/ql/src/test/results/clientpositive/parquet_types.q.out index ad743ef..5ca91f2 100644 --- a/ql/src/test/results/clientpositive/parquet_types.q.out +++ b/ql/src/test/results/clientpositive/parquet_types.q.out @@ -162,28 +162,28 @@ hex(cbinary), m1, l1, st1, d FROM parquet_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types #### A masked pattern was here #### -100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} 2011-01-01 -101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} 2012-02-02 -102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} 2013-03-03 -103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd 68692CCAC0BDE7 {"k4":"v4"} [104,200] {"c1":10,"c2":"j"} 2014-04-04 -104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde B4F3CAFDBEDD {"k5":"v5"} [105,200] {"c1":10,"c2":"m"} 2015-05-05 -105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef 68692CCAC0BDE7 {"k6":"v6"} [106,200] {"c1":10,"c2":"p"} 2016-06-06 -106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg B4F3CAFDBEDD {"k7":"v7"} [107,200] {"c1":10,"c2":"s"} 2017-07-07 -107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh 68692CCAC0BDE7 {"k8":"v8"} [108,200] {"c1":10,"c2":"v"} 2018-08-08 -108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg B4F3CAFDBE 68656C6C6F {"k9":"v9"} [109,200] {"c1":10,"c2":"y"} 2019-09-09 -109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef 68692CCAC0BDE7 {"k10":"v10"} [110,200] {"c1":10,"c2":"b"} 2020-10-10 -110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede B4F3CAFDBEDD {"k11":"v11"} [111,200] {"c1":10,"c2":"e"} 2021-11-11 -111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded 68692CCAC0BDE7 {"k12":"v12"} [112,200] {"c1":10,"c2":"h"} 2022-12-12 -112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd B4F3CAFDBEDD {"k13":"v13"} [113,200] {"c1":10,"c2":"k"} 2023-01-02 -113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc 68692CCAC0BDE7 {"k14":"v14"} [114,200] {"c1":10,"c2":"n"} 2024-02-02 -114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b B4F3CAFDBEDD {"k15":"v15"} [115,200] {"c1":10,"c2":"q"} 2025-03-03 -115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded 68692CCAC0BDE7 {"k16":"v16"} [116,200] {"c1":10,"c2":"q"} 2026-04-04 -116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded B4F3CAFDBEDD {"k17":"v17"} [117,200] {"c1":10,"c2":"w"} 2027-05-05 -117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded 68692CCAC0BDE7 {"k18":"v18"} [118,200] {"c1":10,"c2":"z"} 2028-06-06 -118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede B4F3CAFDBEDD {"k19":"v19"} [119,200] {"c1":10,"c2":"c"} 2029-07-07 -119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 {"k20":"v20"} [120,200] {"c1":10,"c2":"f"} 2030-08-08 -120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD {"k21":"v21"} [121,200] {"c1":10,"c2":"i"} 2031-09-09 -121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde {"k22":"v22"} [122,200] {"c1":10,"c2":"l"} 2032-10-10 +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111 a a B4F3CAFDBEDD {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} 2011-01-01 +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222 ab ab 68692CCAC0BDE7 {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} 2012-02-02 +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333 abc abc B4F3CAFDBEDD {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} 2013-03-03 +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444 abcd abcd 68692CCAC0BDE7 {"k4":"v4"} [104,200] {"c1":10,"c2":"j"} 2014-04-04 +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555 abcde abcde B4F3CAFDBEDD {"k5":"v5"} [105,200] {"c1":10,"c2":"m"} 2015-05-05 +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666 abcde abcdef 68692CCAC0BDE7 {"k6":"v6"} [106,200] {"c1":10,"c2":"p"} 2016-06-06 +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777 abcde abcdefg B4F3CAFDBEDD {"k7":"v7"} [107,200] {"c1":10,"c2":"s"} 2017-07-07 +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888 bcdef abcdefgh 68692CCAC0BDE7 {"k8":"v8"} [108,200] {"c1":10,"c2":"v"} 2018-08-08 +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999 cdefg B4F3CAFDBE 68656C6C6F {"k9":"v9"} [109,200] {"c1":10,"c2":"y"} 2019-09-09 +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101 klmno abcdedef 68692CCAC0BDE7 {"k10":"v10"} [110,200] {"c1":10,"c2":"b"} 2020-10-10 +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111 pqrst abcdede B4F3CAFDBEDD {"k11":"v11"} [111,200] {"c1":10,"c2":"e"} 2021-11-11 +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121 nopqr abcded 68692CCAC0BDE7 {"k12":"v12"} [112,200] {"c1":10,"c2":"h"} 2022-12-12 +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131 opqrs abcdd B4F3CAFDBEDD {"k13":"v13"} [113,200] {"c1":10,"c2":"k"} 2023-01-02 +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141 pqrst abc 68692CCAC0BDE7 {"k14":"v14"} [114,200] {"c1":10,"c2":"n"} 2024-02-02 +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151 qrstu b B4F3CAFDBEDD {"k15":"v15"} [115,200] {"c1":10,"c2":"q"} 2025-03-03 +115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161 rstuv abcded 68692CCAC0BDE7 {"k16":"v16"} [116,200] {"c1":10,"c2":"q"} 2026-04-04 +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171 stuvw abcded B4F3CAFDBEDD {"k17":"v17"} [117,200] {"c1":10,"c2":"w"} 2027-05-05 +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181 tuvwx abcded 68692CCAC0BDE7 {"k18":"v18"} [118,200] {"c1":10,"c2":"z"} 2028-06-06 +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191 uvwzy abcdede B4F3CAFDBEDD {"k19":"v19"} [119,200] {"c1":10,"c2":"c"} 2029-07-07 +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202 vwxyz abcdede 68692CCAC0BDE7 {"k20":"v20"} [120,200] {"c1":10,"c2":"f"} 2030-08-08 +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212 wxyza abcde B4F3CAFDBEDD {"k21":"v21"} [121,200] {"c1":10,"c2":"i"} 2031-09-09 +121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222 bcdef abcde {"k22":"v22"} [122,200] {"c1":10,"c2":"l"} 2032-10-10 PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types @@ -343,28 +343,28 @@ POSTHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types #### A masked pattern was here #### -2011-01-01 01:01:01.111111111 1 -2012-02-02 02:02:02.222222222 1 -2013-03-03 03:03:03.333333333 1 -2014-04-04 04:04:04.444444444 1 -2015-05-05 05:05:05.555555555 1 -2016-06-06 06:06:06.666666666 1 -2017-07-07 07:07:07.777777777 1 -2018-08-08 08:08:08.888888888 1 -2019-09-09 09:09:09.999999999 1 -2020-10-10 10:10:10.101010101 1 -2021-11-11 11:11:11.111111111 1 -2022-12-12 12:12:12.121212121 1 -2023-01-02 13:13:13.131313131 1 -2024-02-02 14:14:14.141414141 1 -2025-03-03 15:15:15.151515151 1 -2026-04-04 16:16:16.161616161 1 -2027-05-05 17:17:17.171717171 1 -2028-06-06 18:18:18.181818181 1 -2029-07-07 19:19:19.191919191 1 -2030-08-08 20:20:20.202020202 1 -2031-09-09 21:21:21.212121212 1 -2032-10-10 22:22:22.222222222 1 +2011-01-01 01:01:01.111 1 +2012-02-02 02:02:02.222 1 +2013-03-03 03:03:03.333 1 +2014-04-04 04:04:04.444 1 +2015-05-05 05:05:05.555 1 +2016-06-06 06:06:06.666 1 +2017-07-07 07:07:07.777 1 +2018-08-08 08:08:08.888 1 +2019-09-09 09:09:09.999 1 +2020-10-10 10:10:10.101 1 +2021-11-11 11:11:11.111 1 +2022-12-12 12:12:12.121 1 +2023-01-02 13:13:13.131 1 +2024-02-02 14:14:14.141 1 +2025-03-03 15:15:15.151 1 +2026-04-04 16:16:16.161 1 +2027-05-05 17:17:17.171 1 +2028-06-06 18:18:18.181 1 +2029-07-07 19:19:19.191 1 +2030-08-08 20:20:20.202 1 +2031-09-09 21:21:21.212 1 +2032-10-10 22:22:22.222 1 PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types