diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index ea4f2f2562..12af77c123 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -258,7 +258,8 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor lcv.child = new BytesColumnVector(total); lcv.child.init(); for (int i = 0; i < valueList.size(); i++) { - ((BytesColumnVector)lcv.child).setVal(i, ((List)valueList).get(i)); + byte[] src = ((List)valueList).get(i); + ((BytesColumnVector)lcv.child).setRef(i, src, 0, src.length); } break; case FLOAT: diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java index de196152a5..8ea5d25677 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java @@ -72,8 +72,9 @@ protected static void writeListData(ParquetWriter writer, boolean isDicti } } for (int j = 0; j < listMaxSize; j++) { - group.append("list_int32_field_for_repeat_test", getIntValue(isDictionaryEncoding, j)); + group.append("list_binary_field_for_repeat_test", getBinaryValue(isDictionaryEncoding, i)); } + writer.write(group); } writer.close(); @@ -157,6 +158,14 @@ public void testRepeateListRead() throws Exception { removeFile(); } + @Test + public void testUnrepeatedStringWithoutNullListRead() throws Exception { + removeFile(); + writeListData(initWriterFromFile(), false, 1025); + testUnRepeateStringWithoutNullListRead(); + removeFile(); + } + private void testListReadAllType(boolean isDictionaryEncoding, int elementNum) throws Exception { testListRead(isDictionaryEncoding, "int", elementNum); testListRead(isDictionaryEncoding, "long", elementNum); @@ -250,6 +259,10 @@ private void testListRead(boolean isDictionaryEncoding, String type, int element try { while (reader.next(NullWritable.get(), previous)) { ListColumnVector vector = (ListColumnVector) previous.cols[0]; + + //since Repeating only happens when offset length is 1. + assertEquals((vector.offsets.length == 1),vector.isRepeating); + for (int i = 0; i < vector.offsets.length; i++) { if (row == elementNum) { assertEquals(i, vector.offsets.length - 1); @@ -305,4 +318,23 @@ private void testRepeateListRead(int elementNum, boolean isNull) throws Exceptio reader.close(); } } + + private void testUnRepeateStringWithoutNullListRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS, "list_binary_field_for_repeat_test"); + conf.set(IOConstants.COLUMNS_TYPES, "array"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader( + "message hive_schema {repeated binary list_binary_field_for_repeat_test;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + while (reader.next(NullWritable.get(), previous)) { + ListColumnVector vector = (ListColumnVector) previous.cols[0]; + assertEquals((vector.offsets.length == 1),vector.isRepeating); + } + } finally { + reader.close(); + } + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java index 33c5c82108..db7777d1e5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java @@ -124,6 +124,7 @@ + "repeated fixed_len_byte_array(3) list_byte_array_field;" + "repeated binary list_binary_field;" + "repeated binary list_decimal_field (DECIMAL(5,2));" + + "repeated binary list_binary_field_for_repeat_test;" + "repeated int32 list_int32_field_for_repeat_test;" + "repeated group map_int32 (MAP_KEY_VALUE) {\n" + " required int32 key;\n"