diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java index 16c063f..cb2aca5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java @@ -64,6 +64,7 @@ int next() throws IOException { void nextVector(LongColumnVector previous, long previousLen) throws IOException { + previous.isRepeating = true; for (int i = 0; i < previousLen; i++) { if (!previous.isNull[i]) { @@ -73,7 +74,13 @@ void nextVector(LongColumnVector previous, long previousLen) // processing is 1, so set that if the value is null previous.vector[i] = 1; } - if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) { + + // The default value for nulls in Vectorization for int types is 1 + // and given that non null value can also be 1, we need to check for isNull also + // when determining the isRepeating flag. + if (previous.isRepeating + && i > 0 + && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) { previous.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java index 23d54e1..51a5818 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java @@ -95,7 +95,13 @@ void nextVector(LongColumnVector previous, long previousLen) // processing is 1, so set that if the value is null previous.vector[i] = 1; } - if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) { + + // The default value for nulls in Vectorization for int types is 1 + // and given that non null value can also be 1, we need to check for isNull also + // when determining the isRepeating flag. + if (previous.isRepeating + && i > 0 + && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) { previous.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java index b30895f..f0931d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java @@ -101,7 +101,13 @@ void nextVector(LongColumnVector previous, long previousLen) // processing is 1, so set that if the value is null previous.vector[i] = 1; } - if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) { + + // The default value for nulls in Vectorization for int types is 1 + // and given that non null value can also be 1, we need to check for isNull also + // when determining the isRepeating flag. + if (previous.isRepeating + && i > 0 + && (previous.vector[i - 1] != previous.vector[i] || previous.isNull[i - 1] != previous.isNull[i])) { previous.isRepeating = false; } }