diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 69272dc41dbc5fe29ab4c98e730b591c28f3a297..1e26c193ce8cc0b69d9513e834726a7277d8335a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -259,21 +259,24 @@ public ListDataWriter(ListObjectInspector inspector, GroupType groupType) { @Override public void write(Object value) { recordConsumer.startGroup(); - recordConsumer.startField(repeatedGroupName, 0); - int listLength = inspector.getListLength(value); - for (int i = 0; i < listLength; i++) { - Object element = inspector.getListElement(value, i); - recordConsumer.startGroup(); - if (element != null) { - recordConsumer.startField(elementName, 0); - elementWriter.write(element); - recordConsumer.endField(elementName, 0); + + if (listLength > 0) { + recordConsumer.startField(repeatedGroupName, 0); + + for (int i = 0; i < listLength; i++) { + Object element = inspector.getListElement(value, i); + recordConsumer.startGroup(); + if (element != null) { + recordConsumer.startField(elementName, 0); + elementWriter.write(element); + recordConsumer.endField(elementName, 0); + } + recordConsumer.endGroup(); } - recordConsumer.endGroup(); - } - recordConsumer.endField(repeatedGroupName, 0); + recordConsumer.endField(repeatedGroupName, 0); + } recordConsumer.endGroup(); } } @@ -307,30 +310,32 @@ public MapDataWriter(MapObjectInspector inspector, GroupType groupType) { @Override public void write(Object value) { recordConsumer.startGroup(); - recordConsumer.startField(repeatedGroupName, 0); Map mapValues = inspector.getMap(value); - for (Map.Entry keyValue : mapValues.entrySet()) { - recordConsumer.startGroup(); - if (keyValue != null) { - // write key element - Object keyElement = keyValue.getKey(); - recordConsumer.startField(keyName, 0); - keyWriter.write(keyElement); - recordConsumer.endField(keyName, 0); - - // write value element - Object valueElement = keyValue.getValue(); - if (valueElement != null) { - recordConsumer.startField(valueName, 1); - valueWriter.write(valueElement); - recordConsumer.endField(valueName, 1); + if (mapValues != null && mapValues.size() > 0) { + recordConsumer.startField(repeatedGroupName, 0); + for (Map.Entry keyValue : mapValues.entrySet()) { + recordConsumer.startGroup(); + if (keyValue != null) { + // write key element + Object keyElement = keyValue.getKey(); + recordConsumer.startField(keyName, 0); + keyWriter.write(keyElement); + recordConsumer.endField(keyName, 0); + + // write value element + Object valueElement = keyValue.getValue(); + if (valueElement != null) { + recordConsumer.startField(valueName, 1); + valueWriter.write(valueElement); + recordConsumer.endField(valueName, 1); + } } + recordConsumer.endGroup(); } - recordConsumer.endGroup(); - } - recordConsumer.endField(repeatedGroupName, 0); + recordConsumer.endField(repeatedGroupName, 0); + } recordConsumer.endGroup(); } } diff --git a/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q new file mode 100644 index 0000000000000000000000000000000000000000..eeae5cf72f583153283195d660762b1d7c1d3979 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q @@ -0,0 +1,20 @@ +drop table if exists testSets; +drop table if exists testSets2; +create table testSets ( +key string, +arrayValues array, +mapValues map) +stored as parquet; + +insert into table testSets select 'abcd', array(), map() from src limit 1; + +create table testSets2 ( +key string, +arrayValues array, +mapValues map) +stored as parquet; +insert into table testSets2 select * from testSets; +select * from testSets2; +drop table testSets; +drop table testSets2; + diff --git a/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ade90a9db52f0423e27cfdd04e82c6500e08de79 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: drop table if exists testSets +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testSets +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists testSets2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testSets2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table testSets ( +key string, +arrayValues array, +mapValues map) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testSets +POSTHOOK: query: create table testSets ( +key string, +arrayValues array, +mapValues map) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testSets +PREHOOK: query: insert into table testSets select 'abcd', array(), map() from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testsets +POSTHOOK: query: insert into table testSets select 'abcd', array(), map() from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testsets +POSTHOOK: Lineage: testsets.arrayvalues EXPRESSION [] +POSTHOOK: Lineage: testsets.key SIMPLE [] +POSTHOOK: Lineage: testsets.mapvalues EXPRESSION [] +PREHOOK: query: create table testSets2 ( +key string, +arrayValues array, +mapValues map) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testSets2 +POSTHOOK: query: create table testSets2 ( +key string, +arrayValues array, +mapValues map) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testSets2 +PREHOOK: query: insert into table testSets2 select * from testSets +PREHOOK: type: QUERY +PREHOOK: Input: default@testsets +PREHOOK: Output: default@testsets2 +POSTHOOK: query: insert into table testSets2 select * from testSets +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsets +POSTHOOK: Output: default@testsets2 +POSTHOOK: Lineage: testsets2.arrayvalues SIMPLE [(testsets)testsets.FieldSchema(name:arrayvalues, type:array, comment:null), ] +POSTHOOK: Lineage: testsets2.key SIMPLE [(testsets)testsets.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: testsets2.mapvalues SIMPLE [(testsets)testsets.FieldSchema(name:mapvalues, type:map, comment:null), ] +PREHOOK: query: select * from testSets2 +PREHOOK: type: QUERY +PREHOOK: Input: default@testsets2 +#### A masked pattern was here #### +POSTHOOK: query: select * from testSets2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsets2 +#### A masked pattern was here #### +abcd NULL NULL +PREHOOK: query: drop table testSets +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testsets +PREHOOK: Output: default@testsets +POSTHOOK: query: drop table testSets +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testsets +POSTHOOK: Output: default@testsets +PREHOOK: query: drop table testSets2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testsets2 +PREHOOK: Output: default@testsets2 +POSTHOOK: query: drop table testSets2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testsets2 +POSTHOOK: Output: default@testsets2