diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index 699de59..30af42c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -82,7 +82,7 @@ /** * The number of rows that have been returned. */ - private long rowsReturned; + private long rowsReturned = 0; /** * The number of rows that have been reading, including the current in flight row group. @@ -93,7 +93,7 @@ * The total number of rows this RecordReader will eventually read. The sum of the * rows of all the row groups. */ - protected long totalRowCount; + protected long totalRowCount = 0; @VisibleForTesting public VectorizedParquetRecordReader( @@ -129,6 +129,8 @@ public VectorizedParquetRecordReader( public void initialize( InputSplit oldSplit, JobConf configuration) throws IOException, InterruptedException { + // the oldSplit may be null during the split phase + if (oldSplit == null) return; jobConf = configuration; ParquetMetadata footer; List blocks; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java index 33567eb..670bfa6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java @@ -18,12 +18,20 @@ package org.apache.hadoop.hive.ql.io.parquet; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.InputSplit; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.io.IOException; +import static junit.framework.TestCase.assertFalse; + public class TestVectorizedColumnReader extends VectorizedColumnReaderTestBase { static boolean isDictionaryEncoding = false; @@ -88,4 +96,17 @@ public void structReadSomeNull() throws Exception { public void decimalRead() throws Exception { decimalRead(isDictionaryEncoding); } + + @Test + public void testNullSplitForParquetReader() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"int32_field"); + conf.set(IOConstants.COLUMNS_TYPES,"int"); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp"); + initialVectorizedRowBatchCtx(conf); + VectorizedParquetRecordReader reader = + new VectorizedParquetRecordReader((InputSplit)null, new JobConf(conf)); + assertFalse(reader.next(reader.createKey(), reader.createValue())); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java index 833cfdb..f537cee 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java @@ -295,7 +295,7 @@ protected static void writeData(ParquetWriter writer, boolean isDictionar writer.close(); } - private void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException { + protected void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException { MapWork mapWork = new MapWork(); VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx(); rbCtx.init(createStructObjectInspector(conf), new String[0]);