diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 4a52eb5..2776fe9 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -323,6 +323,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_coalesce.q,\ vector_coalesce_2.q,\ vector_coalesce_3.q,\ + vector_coalesce_4.q,\ vector_complex_all.q,\ vector_count.q,\ vector_count_distinct.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java index 3a560ca..c66beb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -225,9 +225,14 @@ public void evaluate(VectorizedRowBatch batch) { // NULL out the remaining columns. outputColVector.noNulls = false; - for (int i = 0; i < unassignedColumnCount; i++) { - final int batchIndex = unassignedBatchIndices[i]; - outputIsNull[batchIndex] = true; + if (isAllUnassigned) { + outputIsNull[0] = true; + outputColVector.isRepeating = true; + } else { + for (int i = 0; i < unassignedColumnCount; i++) { + final int batchIndex = unassignedBatchIndices[i]; + outputIsNull[batchIndex] = true; + } } } diff --git ql/src/test/queries/clientpositive/vector_coalesce_4.q ql/src/test/queries/clientpositive/vector_coalesce_4.q new file mode 100644 index 0000000..a050beb --- /dev/null +++ ql/src/test/queries/clientpositive/vector_coalesce_4.q @@ -0,0 +1,14 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table coalesce_test(a int, b int) stored as orc; + +insert into coalesce_test values (1, 2); + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into coalesce_test values (NULL, NULL); + +explain vectorization detail +select coalesce(a, b) from coalesce_test order by a, b; + +select coalesce(a, b) from coalesce_test order by a, b;; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out new file mode 100644 index 0000000..5c3093f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: create table coalesce_test(a int, b int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@coalesce_test +POSTHOOK: query: create table coalesce_test(a int, b int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@coalesce_test +PREHOOK: query: insert into coalesce_test values (1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@coalesce_test +POSTHOOK: query: insert into coalesce_test values (1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@coalesce_test +POSTHOOK: Lineage: coalesce_test.a SCRIPT [] +POSTHOOK: Lineage: coalesce_test.b SCRIPT [] +PREHOOK: query: insert into coalesce_test values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@coalesce_test +POSTHOOK: query: insert into coalesce_test values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@coalesce_test +POSTHOOK: Lineage: coalesce_test.a EXPRESSION [] +POSTHOOK: Lineage: coalesce_test.b EXPRESSION [] +PREHOOK: query: explain vectorization detail +select coalesce(a, b) from coalesce_test order by a, b +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select coalesce(a, b) from coalesce_test order by a, b +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: coalesce_test + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] + Select Operator + expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 0, 1] + selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3] + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select coalesce(a, b) from coalesce_test order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@coalesce_test +#### A masked pattern was here #### +POSTHOOK: query: select coalesce(a, b) from coalesce_test order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@coalesce_test +#### A masked pattern was here #### +NULL +1 diff --git ql/src/test/results/clientpositive/vector_coalesce_4.q.out ql/src/test/results/clientpositive/vector_coalesce_4.q.out new file mode 100644 index 0000000..088d884 --- /dev/null +++ ql/src/test/results/clientpositive/vector_coalesce_4.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: create table coalesce_test(a int, b int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@coalesce_test +POSTHOOK: query: create table coalesce_test(a int, b int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@coalesce_test +PREHOOK: query: insert into coalesce_test values (1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@coalesce_test +POSTHOOK: query: insert into coalesce_test values (1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@coalesce_test +POSTHOOK: Lineage: coalesce_test.a SCRIPT [] +POSTHOOK: Lineage: coalesce_test.b SCRIPT [] +PREHOOK: query: insert into coalesce_test values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@coalesce_test +POSTHOOK: query: insert into coalesce_test values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@coalesce_test +POSTHOOK: Lineage: coalesce_test.a EXPRESSION [] +POSTHOOK: Lineage: coalesce_test.b EXPRESSION [] +PREHOOK: query: explain vectorization detail +select coalesce(a, b) from coalesce_test order by a, b +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select coalesce(a, b) from coalesce_test order by a, b +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: coalesce_test + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] + Select Operator + expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 0, 1] + selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: a:int, b:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select coalesce(a, b) from coalesce_test order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@coalesce_test +#### A masked pattern was here #### +POSTHOOK: query: select coalesce(a, b) from coalesce_test order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@coalesce_test +#### A masked pattern was here #### +NULL +1