diff --git a/data/files/sample2.json b/data/files/sample2.json new file mode 100644 index 0000000..4e1802f --- /dev/null +++ b/data/files/sample2.json @@ -0,0 +1,2 @@ +{"id": 1, "reports": [2,3], "address": {"country": 1, "state": 1}} +{"id": 2, "reports": [], "address": {"country": 1, "state": 2}} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java index 6091882..ae545b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java @@ -169,6 +169,13 @@ public StructField getStructFieldRef(final String name) { return new ArrayList(Arrays.asList(arrWritable)); } + //since setStructFieldData and create return a list, getStructFieldData should be able to + //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde + //is something else. + if (data instanceof List) { + return ((List) data); + } + throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); } diff --git a/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q b/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q new file mode 100644 index 0000000..e0b21d1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q @@ -0,0 +1,31 @@ +add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar; + +CREATE TABLE parquet_table_json_partition ( +id bigint COMMENT 'from deserializer', +address struct COMMENT 'from deserializer', +reports array COMMENT 'from deserializer') +PARTITIONED BY ( +ts string) +ROW FORMAT SERDE +'org.apache.hive.hcatalog.data.JsonSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +LOAD DATA LOCAL INPATH '../../data/files/sample2.json' INTO TABLE parquet_table_json_partition PARTITION(ts='20150101'); + +SELECT * FROM parquet_table_json_partition LIMIT 100; + +ALTER TABLE parquet_table_json_partition + SET FILEFORMAT INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' + SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'; + +SELECT * FROM parquet_table_json_partition LIMIT 100; + +CREATE TABLE new_table AS SELECT * FROM parquet_table_json_partition LIMIT 100; + +SELECT * FROM new_table; + + diff --git a/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out b/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out new file mode 100644 index 0000000..c4d7197 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out @@ -0,0 +1,99 @@ +PREHOOK: query: CREATE TABLE parquet_table_json_partition ( +id bigint COMMENT 'from deserializer', +address struct COMMENT 'from deserializer', +reports array COMMENT 'from deserializer') +PARTITIONED BY ( +ts string) +ROW FORMAT SERDE +'org.apache.hive.hcatalog.data.JsonSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_table_json_partition +POSTHOOK: query: CREATE TABLE parquet_table_json_partition ( +id bigint COMMENT 'from deserializer', +address struct COMMENT 'from deserializer', +reports array COMMENT 'from deserializer') +PARTITIONED BY ( +ts string) +ROW FORMAT SERDE +'org.apache.hive.hcatalog.data.JsonSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_table_json_partition +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sample2.json' INTO TABLE parquet_table_json_partition PARTITION(ts='20150101') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_table_json_partition +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sample2.json' INTO TABLE parquet_table_json_partition PARTITION(ts='20150101') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_table_json_partition +POSTHOOK: Output: default@parquet_table_json_partition@ts=20150101 +PREHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_table_json_partition +PREHOOK: Input: default@parquet_table_json_partition@ts=20150101 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_table_json_partition +POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101 +#### A masked pattern was here #### +1 {"country":1,"state":1} [2,3] 20150101 +2 {"country":1,"state":2} [] 20150101 +PREHOOK: query: ALTER TABLE parquet_table_json_partition + SET FILEFORMAT INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' + SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@parquet_table_json_partition +PREHOOK: Output: default@parquet_table_json_partition +POSTHOOK: query: ALTER TABLE parquet_table_json_partition + SET FILEFORMAT INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' + SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@parquet_table_json_partition +POSTHOOK: Output: default@parquet_table_json_partition +PREHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_table_json_partition +PREHOOK: Input: default@parquet_table_json_partition@ts=20150101 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_table_json_partition +POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101 +#### A masked pattern was here #### +1 {"country":1,"state":1} [2,3] 20150101 +2 {"country":1,"state":2} [] 20150101 +PREHOOK: query: CREATE TABLE new_table AS SELECT * FROM parquet_table_json_partition LIMIT 100 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@parquet_table_json_partition +PREHOOK: Input: default@parquet_table_json_partition@ts=20150101 +PREHOOK: Output: database:default +PREHOOK: Output: default@new_table +POSTHOOK: query: CREATE TABLE new_table AS SELECT * FROM parquet_table_json_partition LIMIT 100 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@parquet_table_json_partition +POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_table +PREHOOK: query: SELECT * FROM new_table +PREHOOK: type: QUERY +PREHOOK: Input: default@new_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM new_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_table +#### A masked pattern was here #### +2 {"country":1,"state":2} [] 20150101 +1 {"country":1,"state":1} [2,3] 20150101