diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetNullOptimization.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetNullOptimization.java new file mode 100644 index 0000000..9c089a0 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetNullOptimization.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import parquet.hadoop.metadata.BlockMetaData; +import parquet.io.api.RecordConsumer; +import parquet.schema.MessageType; +import parquet.schema.MessageTypeParser; + +public class TestParquetNullOptimization extends AbstractTestParquetDirect { + JobConf conf; + String columnNames; + String columnTypes; + + @Before + public void initConf() throws Exception { + conf = new JobConf(); + + } + + @Test + public void testColumnsWithNull() throws Exception { + // define schema + columnNames = "intCol,doubleCol,booleanCol"; + columnTypes = "int,double,boolean"; + StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes); + MessageType fileSchema = MessageTypeParser.parseMessageType( + "message hive_schema {\n" + + " optional int32 intCol;\n" + + " optional double doubleCol;\n" + + " optional boolean booleanCol;\n" + + "}\n" + ); + + conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol,doubleCol,booleanCol"); + conf.set("columns", "intCol"); + conf.set("columns.types", "int"); + + // create Parquet file with specific data. Columns have null value. + Path testPath = writeDirect("ColumnsWithNull", fileSchema, + new DirectWriter() { + @Override + public void write(RecordConsumer consumer) { + consumer.startMessage(); + consumer.startField("intCol", 0); + consumer.addInteger(1); + consumer.endField("intCol", 0); + // doubleCol is null, which is the field ("doubleCol", 1) + consumer.startField("booleanCol", 2); + consumer.addBoolean(true); + consumer.endField("booleanCol", 2); + consumer.endMessage(); + } + }); + + ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) + new MapredParquetInputFormat().getRecordReader( + new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null); + + BlockMetaData block = recordReader.getFiltedBlocks().get(0); + + // column 0 + Assert.assertEquals(true, block.getColumns().get(0).getStatistics().hasNonNullValue()); + Assert.assertEquals(0, block.getColumns().get(0).getStatistics().getNumNulls()); + // column 1 + Assert.assertEquals(false, block.getColumns().get(1).getStatistics().hasNonNullValue()); + Assert.assertEquals(1, block.getColumns().get(1).getStatistics().getNumNulls()); + // column 2 + Assert.assertEquals(true, block.getColumns().get(2).getStatistics().hasNonNullValue()); + Assert.assertEquals(0, block.getColumns().get(2).getStatistics().getNumNulls()); + } +}