diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 27b53b8..52ef2d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -101,6 +101,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; +import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -353,6 +355,14 @@ vectorDeserializeTextSupportSet.addAll(Arrays.asList(Support.values())); } + private static final Set supportedAcidInputFormats = new TreeSet(); + static { + supportedAcidInputFormats.add(OrcInputFormat.class.getName()); + // For metadataonly or empty rows optimizations, null/onerow input format can be selected. + supportedAcidInputFormats.add(NullRowsInputFormat.class.getName()); + supportedAcidInputFormats.add(OneNullRowInputFormat.class.getName()); + } + private BaseWork currentBaseWork; private Operator currentOperator; private Collection> vectorizedInputFormatExcludes; @@ -1201,7 +1211,7 @@ private boolean verifyAndSetVectorPartDesc( // Today, ACID tables are only ORC and that format is vectorizable. Verify these // assumptions. Preconditions.checkState(isInputFileFormatVectorized); - Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName())); + Preconditions.checkState(supportedAcidInputFormats.contains(inputFileFormatClassName)); if (!useVectorizedInputFileFormat) { enabledConditionsNotMetList.add("Vectorizing ACID tables requires " diff --git a/ql/src/test/queries/clientpositive/acid_nullscan.q b/ql/src/test/queries/clientpositive/acid_nullscan.q new file mode 100644 index 0000000..6768f01 --- /dev/null +++ b/ql/src/test/queries/clientpositive/acid_nullscan.q @@ -0,0 +1,14 @@ + +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=true; + +CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; +insert into table acid_vectorized values (1, 'bar'); + +select sum(a) from acid_vectorized where false; + diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out new file mode 100644 index 0000000..de66ffe --- /dev/null +++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out @@ -0,0 +1,37 @@ +PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_vectorized values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a SCRIPT [] +POSTHOOK: Lineage: acid_vectorized.b SCRIPT [] +PREHOOK: query: select sum(a) from acid_vectorized where false +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select sum(a) from acid_vectorized where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +NULL