diff --git data/files/parquet_types.txt data/files/parquet_types.txt index 750626e1d4e3a010f9d231fb01d754c88a12289a..d34206232aef5a1361eefd3d0bd5a37d32e0db47 100644 --- data/files/parquet_types.txt +++ data/files/parquet_types.txt @@ -1,21 +1,21 @@ -100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a -101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab -102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc -103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd -104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde -105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef -106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg -107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh -108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop -109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef -110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede -111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded -112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd -113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc -114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b -115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161|rstuv|abcded -116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded -117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded -118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede -119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede -120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde +100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |k1:v1|101,200|10,abc +101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |k2:v2|102,200|10,def +102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|k3:v3|103,200|10,ghi +103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|k4:v4|104,200|10,jkl +104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|k5:v5|105,200|10,mno +105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|k6:v6|106,200|10,pqr +106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|k7:v7|107,200|10,stu +107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|k8:v8|108,200|10,vwx +108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop|k9:v9|109,200|10,yza +109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|k10:v10|110,200|10,bcd +110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|k11:v11|111,200|10,efg +111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|k12:v12|112,200|10,hij +112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|k13:v13|113,200|10,klm +113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|k14:v14|114,200|10,nop +114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|k15:v15|115,200|10,qrs +115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|k16:v16|116,200|10,qrs +116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|k17:v17|117,200|10,wxy +117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|k18:v18|118,200|10,zab +118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|k19:v19|119,200|10,cde +119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|k20:v20|120,200|10,fgh +120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|k21:v21|121,200|10,ijk diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java index c5d80f22b82e57c5acf8286d879a248a233aa051..582a5dfdaccaa25d46bfb515248eeb4bb84bedc5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java @@ -13,9 +13,6 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; -import java.util.List; - -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; @@ -33,7 +30,7 @@ private Writable[] mapPairContainer; public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, - final int index, List hiveSchemaTypeInfos) { + final int index) { this.parent = parent; this.index = index; int count = groupType.getFieldCount(); @@ -43,8 +40,7 @@ public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupCon isMap = count == 2; converters = new Converter[count]; for (int i = 0; i < count; i++) { - converters[i] = getConverterFromDescription(groupType.getType(i), i, this, - hiveSchemaTypeInfos); + converters[i] = getConverterFromDescription(groupType.getType(i), i, this); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java index 48e4a133d1b30ef43a53e1a6c19b68682e86835f..0e310fbfb748d5409ff3c0d8cd8327bec9988ecf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java @@ -16,7 +16,6 @@ import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; @@ -37,21 +36,19 @@ private final Object[] currentArr; private Writable[] rootMap; - public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema, - final List hiveSchemaTypeInfos) { - this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos); + public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) { + this(requestedSchema, null, 0, tableSchema); final int fieldCount = tableSchema.getFieldCount(); this.rootMap = new Writable[fieldCount]; } public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, - final int index, final List hiveSchemaTypeInfos) { - this(groupType, parent, index, groupType, hiveSchemaTypeInfos); + final int index) { + this(groupType, parent, index, groupType); } public DataWritableGroupConverter(final GroupType selectedGroupType, - final HiveGroupConverter parent, final int index, final GroupType containingGroupType, - final List hiveSchemaTypeInfos) { + final HiveGroupConverter parent, final int index, final GroupType containingGroupType) { this.parent = parent; this.index = index; final int totalFieldCount = containingGroupType.getFieldCount(); @@ -65,8 +62,7 @@ public DataWritableGroupConverter(final GroupType selectedGroupType, Type subtype = selectedFields.get(i); if (containingGroupType.getFields().contains(subtype)) { converters[i] = getConverterFromDescription(subtype, - containingGroupType.getFieldIndex(subtype.getName()), this, - hiveSchemaTypeInfos); + containingGroupType.getFieldIndex(subtype.getName()), this); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java index 0971a68e151cb1a0469671f119b479719f36fa6a..5a4613673676a06041b91ac8d0419f5672d6722d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java @@ -31,10 +31,8 @@ private final DataWritableGroupConverter root; - public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema, - final List hiveColumnTypeInfos) { - this.root = new DataWritableGroupConverter(requestedSchema, tableSchema, - hiveColumnTypeInfos); + public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) { + this.root = new DataWritableGroupConverter(requestedSchema, tableSchema); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index e6fb5ae137a1c91953c2458897d98d109586e9d6..bce640082d38d538ae5701e410950d924a517bec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -16,19 +16,12 @@ import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -152,32 +145,6 @@ protected TimestampWritable convert(Binary binary) { } }; } - }, - ECHAR_CONVERTER(HiveCharWritable.class) { - @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { - return new BinaryConverter(type, parent, index) { - @Override - protected HiveCharWritable convert(Binary binary) { - HiveChar hiveChar = new HiveChar(); - hiveChar.setValue(binary.toStringUsingUTF8()); - return new HiveCharWritable(hiveChar); - } - }; - } - }, - EVARCHAR_CONVERTER(HiveVarcharWritable.class) { - @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { - return new BinaryConverter(type, parent, index) { - @Override - protected HiveVarcharWritable convert(Binary binary) { - HiveVarchar hiveVarchar = new HiveVarchar(); - hiveVarchar.setValue(binary.toStringUsingUTF8()); - return new HiveVarcharWritable(hiveVarchar); - } - }; - } }; final Class _type; @@ -193,7 +160,7 @@ private ETypeConverter(final Class type) { abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent); public static Converter getNewConverter(final PrimitiveType type, final int index, - final HiveGroupConverter parent, List hiveSchemaTypeInfos) { + final HiveGroupConverter parent) { if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { //TODO- cleanup once parquet support Timestamp type annotation. return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); @@ -201,15 +168,7 @@ public static Converter getNewConverter(final PrimitiveType type, final int inde if (OriginalType.DECIMAL == type.getOriginalType()) { return EDECIMAL_CONVERTER.getConverter(type, index, parent); } else if (OriginalType.UTF8 == type.getOriginalType()) { - if (hiveSchemaTypeInfos.get(index).getTypeName() - .startsWith(serdeConstants.CHAR_TYPE_NAME)) { - return ECHAR_CONVERTER.getConverter(type, index, parent); - } else if (hiveSchemaTypeInfos.get(index).getTypeName() - .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { - return EVARCHAR_CONVERTER.getConverter(type, index, parent); - } else if (type.isPrimitive()) { - return ESTRING_CONVERTER.getConverter(type, index, parent); - } + return ESTRING_CONVERTER.getConverter(type, index, parent); } Class javaType = type.getPrimitiveTypeName().javaType; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java index a364729505eaa7b0b0c9b0c326a8a6398b8b3dbe..78bdf628c7c9d2d8953aef5d8d65fc66f0eb7d00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java @@ -13,9 +13,6 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; -import java.util.List; - -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Writable; import parquet.io.api.Converter; @@ -26,20 +23,17 @@ public abstract class HiveGroupConverter extends GroupConverter { protected static Converter getConverterFromDescription(final Type type, final int index, - final HiveGroupConverter parent, List hiveSchemaTypeInfos) { + final HiveGroupConverter parent) { if (type == null) { return null; } if (type.isPrimitive()) { - return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent, - hiveSchemaTypeInfos); + return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent); } else { if (type.asGroupType().getRepetition() == Repetition.REPEATED) { - return new ArrayWritableGroupConverter(type.asGroupType(), parent, index, - hiveSchemaTypeInfos); + return new ArrayWritableGroupConverter(type.asGroupType(), parent, index); } else { - return new DataWritableGroupConverter(type.asGroupType(), parent, index, - hiveSchemaTypeInfos); + return new DataWritableGroupConverter(type.asGroupType(), parent, index); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 5e5df57216d453c643925d3eb0abf593c6d32e2e..2ad7330365b8327e6f1b78ad5b9760e252d1339b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -14,7 +14,6 @@ package org.apache.hadoop.hive.ql.io.parquet.read; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -24,8 +23,6 @@ import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.util.StringUtils; @@ -56,7 +53,7 @@ * From a string which columns names (including hive column), return a list * of string columns * - * @param comma separated list of columns + * @param columns comma separated list of columns * @return list with virtual columns removed */ private static List getColumns(final String columns) { @@ -64,27 +61,6 @@ removeVirtualColumns(StringUtils.getStringCollection(columns)); } - private static List getColumnTypes(Configuration configuration) { - - List columnNames; - String columnNamesProperty = configuration.get(IOConstants.COLUMNS); - if (columnNamesProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNamesProperty.split(",")); - } - List columnTypes; - String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES); - if (columnTypesProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty); - } - - columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes); - return columnTypes; - } - /** * * It creates the readContext for Parquet side with the requested schema during the init phase. @@ -173,8 +149,7 @@ } final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser. parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration); - return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema, - getColumnTypes(configuration)); + return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema); } /** @@ -194,4 +169,4 @@ private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType } return requestedSchema; } -} +} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_types.q ql/src/test/queries/clientpositive/parquet_types.q index 86af5af40bbb95472d7ef5df6519469cba9a129d..22585c3ce3fa3db3c4b4726982df8bb7922a3553 100644 --- ql/src/test/queries/clientpositive/parquet_types.q +++ ql/src/test/queries/clientpositive/parquet_types.q @@ -10,9 +10,14 @@ CREATE TABLE parquet_types_staging ( cstring1 string, t timestamp, cchar char(5), - cvarchar varchar(10) + cvarchar varchar(10), + m1 map, + l1 array, + st1 struct ) ROW FORMAT DELIMITED -FIELDS TERMINATED BY '|'; +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':'; CREATE TABLE parquet_types ( cint int, @@ -23,7 +28,10 @@ CREATE TABLE parquet_types ( cstring1 string, t timestamp, cchar char(5), - cvarchar varchar(10) + cvarchar varchar(10), + m1 map, + l1 array, + st1 struct ) STORED AS PARQUET; LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging; @@ -32,6 +40,8 @@ INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging; SELECT * FROM parquet_types; +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types; + SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git ql/src/test/results/clientpositive/parquet_types.q.out ql/src/test/results/clientpositive/parquet_types.q.out index 803a826ba0c386af784dd24c0455ac1939af380b..275897cb6b59f175ac316bb551db64e7f6af2df3 100644 --- ql/src/test/results/clientpositive/parquet_types.q.out +++ ql/src/test/results/clientpositive/parquet_types.q.out @@ -15,9 +15,14 @@ PREHOOK: query: CREATE TABLE parquet_types_staging ( cstring1 string, t timestamp, cchar char(5), - cvarchar varchar(10) + cvarchar varchar(10), + m1 map, + l1 array, + st1 struct ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@parquet_types_staging @@ -30,9 +35,14 @@ POSTHOOK: query: CREATE TABLE parquet_types_staging ( cstring1 string, t timestamp, cchar char(5), - cvarchar varchar(10) + cvarchar varchar(10), + m1 map, + l1 array, + st1 struct ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@parquet_types_staging @@ -45,7 +55,10 @@ PREHOOK: query: CREATE TABLE parquet_types ( cstring1 string, t timestamp, cchar char(5), - cvarchar varchar(10) + cvarchar varchar(10), + m1 map, + l1 array, + st1 struct ) STORED AS PARQUET PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -59,7 +72,10 @@ POSTHOOK: query: CREATE TABLE parquet_types ( cstring1 string, t timestamp, cchar char(5), - cvarchar varchar(10) + cvarchar varchar(10), + m1 map, + l1 array, + st1 struct ) STORED AS PARQUET POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -88,6 +104,9 @@ POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parque POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct, comment:null), ] POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] PREHOOK: query: SELECT * FROM parquet_types PREHOOK: type: QUERY @@ -97,27 +116,56 @@ POSTHOOK: query: SELECT * FROM parquet_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types #### A masked pattern was here #### -100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a -101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab -102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc -103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd -104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde -105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef -106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg -107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh -108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg abcdefghij -109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef -110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede -111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded -112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd -113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc -114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b -115 1 1 1.0 4.5 tuv 2026-04-04 16:16:16.161616161 rstuv abcded -116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded -117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded -118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede -119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede -120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd {"k4":"v4"} [104,200] {"c1":10,"c2":"j"} +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde {"k5":"v5"} [105,200] {"c1":10,"c2":"m"} +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef {"k6":"v6"} [106,200] {"c1":10,"c2":"p"} +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg {"k7":"v7"} [107,200] {"c1":10,"c2":"s"} +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh {"k8":"v8"} [108,200] {"c1":10,"c2":"v"} +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg abcdefghij {"k9":"v9"} [109,200] {"c1":10,"c2":"y"} +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef {"k10":"v10"} [110,200] {"c1":10,"c2":"b"} +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede {"k11":"v11"} [111,200] {"c1":10,"c2":"e"} +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded {"k12":"v12"} [112,200] {"c1":10,"c2":"h"} +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd {"k13":"v13"} [113,200] {"c1":10,"c2":"k"} +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc {"k14":"v14"} [114,200] {"c1":10,"c2":"n"} +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b {"k15":"v15"} [115,200] {"c1":10,"c2":"q"} +115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded {"k16":"v16"} [116,200] {"c1":10,"c2":"q"} +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded {"k17":"v17"} [117,200] {"c1":10,"c2":"w"} +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded {"k18":"v18"} [118,200] {"c1":10,"c2":"z"} +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede {"k19":"v19"} [119,200] {"c1":10,"c2":"c"} +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede {"k20":"v20"} [120,200] {"c1":10,"c2":"f"} +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde {"k21":"v21"} [121,200] {"c1":10,"c2":"i"} +PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +a 1 a 3 +ab 2 ab 3 +abc 3 abc 3 +abcd 4 abcd 4 +abcde 5 abcde 5 +abcde 5 abcdef 6 +abcde 5 abcdefg 7 +bcdef 5 abcdefgh 8 +cdefg 5 abcdefghij 10 +klmno 5 abcdedef 8 +pqrst 5 abcdede 7 +nopqr 5 abcded 6 +opqrs 5 abcdd 5 +pqrst 5 abc 3 +qrstu 5 b 1 +rstuv 5 abcded 6 +stuvw 5 abcded 6 +tuvwx 5 abcded 6 +uvwzy 5 abcdede 7 +vwxyz 5 abcdede 7 +wxyza 5 abcde 5 PREHOOK: query: SELECT ctinyint, MAX(cint), MIN(csmallint), diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java index d16e313b43999c5a67e5f30a75d6401058bdd993..2baceedbbac98ec77cfda12fa24bf08c98dc56e4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java @@ -21,6 +21,11 @@ import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.io.Text; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; public class WritableHiveCharObjectInspector extends AbstractPrimitiveWritableObjectInspector implements SettableHiveCharObjectInspector { @@ -39,6 +44,12 @@ public HiveChar getPrimitiveJavaObject(Object o) { if (o == null) { return null; } + + if (o instanceof Text) { + String str = ((Text)o).toString(); + return new HiveChar(str, ((CharTypeInfo)typeInfo).getLength()); + } + HiveCharWritable writable = ((HiveCharWritable) o); if (doesWritableMatchTypeParams(writable)) { return writable.getHiveChar(); @@ -53,6 +64,14 @@ public HiveCharWritable getPrimitiveWritableObject(Object o) { if (o == null) { return null; } + + if (o instanceof Text) { + String str = ((Text)o).toString(); + HiveCharWritable hcw = new HiveCharWritable(); + hcw.set(str, ((CharTypeInfo)typeInfo).getLength()); + return hcw; + } + HiveCharWritable writable = ((HiveCharWritable) o); if (doesWritableMatchTypeParams((HiveCharWritable) o)) { return writable; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java index 28c9080660b9d4c19789ece1754ef4ecec27f2e7..e723878bf18a2a73a419b9e1bbce400e155d0937 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java @@ -19,10 +19,15 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.HiveStringUtils; public class WritableHiveVarcharObjectInspector extends AbstractPrimitiveWritableObjectInspector implements SettableHiveVarcharObjectInspector { @@ -43,6 +48,12 @@ public HiveVarchar getPrimitiveJavaObject(Object o) { if (o == null) { return null; } + + if (o instanceof Text) { + String str = ((Text)o).toString(); + return new HiveVarchar(str, ((VarcharTypeInfo)typeInfo).getLength()); + } + HiveVarcharWritable writable = ((HiveVarcharWritable)o); if (doesWritableMatchTypeParams(writable)) { return writable.getHiveVarchar(); @@ -57,6 +68,14 @@ public HiveVarcharWritable getPrimitiveWritableObject(Object o) { if (o == null) { return null; } + + if (o instanceof Text) { + String str = ((Text)o).toString(); + HiveVarcharWritable hcw = new HiveVarcharWritable(); + hcw.set(str, ((VarcharTypeInfo)typeInfo).getLength()); + return hcw; + } + HiveVarcharWritable writable = ((HiveVarcharWritable)o); if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) { return writable;