diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index b9d85f6..cc62be1 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -234,6 +234,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_nullsafe_join.q,\ + vector_null_projection.q,\ vector_orderby_5.q,\ vector_outer_join0.q,\ vector_outer_join1.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index b429c56..3ed3c7e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1191,7 +1191,7 @@ private boolean validateAggregationDesc(List descs, boolean isR return true; } - private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) { + private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). @@ -1201,7 +1201,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) { } } String typeName = desc.getTypeInfo().getTypeName(); - boolean ret = validateDataType(typeName); + boolean ret = validateDataType(typeName, mode); if (!ret) { LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); return false; @@ -1215,7 +1215,8 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) { } if (desc.getChildren() != null) { for (ExprNodeDesc d: desc.getChildren()) { - boolean r = validateExprNodeDescRecursive(d); + // Don't restrict child expressions for projection. Always use looser FILTER mode. + boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER); if (!r) { return false; } @@ -1229,7 +1230,7 @@ private boolean validateExprNodeDesc(ExprNodeDesc desc) { } boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc)) { + if (!validateExprNodeDescRecursive(desc, mode)) { return false; } try { @@ -1312,8 +1313,13 @@ private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean is return false; } - private boolean validateDataType(String type) { - return supportedDataTypesPattern.matcher(type.toLowerCase()).matches(); + private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) { + type = type.toLowerCase(); + boolean result = supportedDataTypesPattern.matcher(type).matches(); + if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { + return false; + } + return result; } private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName, diff --git ql/src/test/queries/clientpositive/vector_null_projection.q ql/src/test/queries/clientpositive/vector_null_projection.q new file mode 100644 index 0000000..765e45f --- /dev/null +++ ql/src/test/queries/clientpositive/vector_null_projection.q @@ -0,0 +1,18 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table a(s string) stored as orc; +create table b(s string) stored as orc; +insert into table a values('aaa'); +insert into table b values('aaa'); + +-- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a; + +select NULL from a; + +explain +select NULL as x from a union distinct select NULL as x from b; + +select NULL as x from a union distinct select NULL as x from b; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_null_projection.q.out ql/src/test/results/clientpositive/tez/vector_null_projection.q.out new file mode 100644 index 0000000..9b7b698 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_null_projection.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: create table a(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table a values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into table b values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +PREHOOK: type: QUERY +POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL from a +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select NULL from a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL +PREHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Select Operator + Select Operator + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Map 4 + Map Operator Tree: + TableScan + alias: b + Select Operator + Select Operator + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: void) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +NULL diff --git ql/src/test/results/clientpositive/vector_null_projection.q.out ql/src/test/results/clientpositive/vector_null_projection.q.out new file mode 100644 index 0000000..7c3136f --- /dev/null +++ ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -0,0 +1,163 @@ +PREHOOK: query: create table a(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table a values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into table b values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +PREHOOK: type: QUERY +POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL from a +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select NULL from a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL +PREHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: void) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +NULL