diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java index 620ad53..d7795c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java @@ -1683,6 +1683,7 @@ public InStream getStream() { * stripe. */ public static class StringDictionaryTreeReader extends TreeReader { + private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; private DynamicByteArray dictionaryBuffer; private int[] dictionaryOffsets; protected IntegerReader reader; @@ -1862,11 +1863,20 @@ public Object nextVector(Object previousVector, long batchSize) throws IOExcepti } result.isRepeating = scratchlcv.isRepeating; } else { - // Entire stripe contains null strings. - result.isRepeating = true; - result.noNulls = false; - result.isNull[0] = true; - result.setRef(0, "".getBytes(), 0, 0); + if (dictionaryOffsets == null) { + // Entire stripe contains null strings. + result.isRepeating = true; + result.noNulls = false; + result.isNull[0] = true; + result.setRef(0, EMPTY_BYTE_ARRAY, 0, 0); + } else { + // stripe contains nulls and empty strings + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + result.setRef(i, EMPTY_BYTE_ARRAY, 0, 0); + } + } + } } return result; } diff --git a/ql/src/test/queries/clientpositive/vector_string_reader_empty_dict.q b/ql/src/test/queries/clientpositive/vector_string_reader_empty_dict.q new file mode 100644 index 0000000..0e8a743 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_string_reader_empty_dict.q @@ -0,0 +1,20 @@ +create table orcstr (vcol varchar(20)) stored as orc; + +insert overwrite table orcstr select null from src; + +SET hive.fetch.task.conversion=none; + +SET hive.vectorized.execution.enabled=false; +select vcol from orcstr limit 1; + +SET hive.vectorized.execution.enabled=true; +select vcol from orcstr limit 1; + +insert overwrite table orcstr select "" from src; + +SET hive.vectorized.execution.enabled=false; +select vcol from orcstr limit 1; + +SET hive.vectorized.execution.enabled=true; +select vcol from orcstr limit 1; + diff --git a/ql/src/test/results/clientpositive/vector_string_reader_empty_dict.q.out b/ql/src/test/results/clientpositive/vector_string_reader_empty_dict.q.out new file mode 100644 index 0000000..4f00bed --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_string_reader_empty_dict.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcstr +POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcstr +PREHOOK: query: insert overwrite table orcstr select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: insert overwrite table orcstr select "" from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select "" from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +