From 8e9d0bf1fa7af72e8ac281d94315eb4006303ef8 Mon Sep 17 00:00:00 2001 From: Gopal V Date: Tue, 13 Feb 2018 16:14:36 -0800 Subject: [PATCH] Vectorize delayed fixup --- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/optimizer/physical/Vectorizer.java | 42 +- .../queries/clientpositive/vectorized_mapjoin3.q | 53 ++ .../clientpositive/llap/vectorized_mapjoin3.q.out | 669 +++++++++++++++++++++ 4 files changed, 764 insertions(+), 1 deletion(-) create mode 100644 ql/src/test/queries/clientpositive/vectorized_mapjoin3.q create mode 100644 ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 391170f..2bf6d28 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -430,6 +430,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_insert_into_bucketed_table.q,\ vectorized_mapjoin.q,\ + vectorized_mapjoin3.q,\ vectorized_math_funcs.q,\ vectorized_nested_mapjoin.q,\ vectorized_parquet.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 1186bd4..244dcf1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -27,6 +27,8 @@ import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -354,6 +356,8 @@ private BaseWork currentBaseWork; private Operator currentOperator; private Collection> vectorizedInputFormatExcludes; + private Map, Set, Operator>>> delayedFixups = + new IdentityHashMap, Set, Operator>>>(); public void testSetCurrentBaseWork(BaseWork testBaseWork) { currentBaseWork = testBaseWork; @@ -749,6 +753,8 @@ public VectorDesc getVectorDesc() { List> currentVectorParentList = newOperatorList(); currentVectorParentList.add(dummyVectorOperator); + delayedFixups.clear(); + do { List> nextParentList = newOperatorList(); List> nextVectorParentList= newOperatorList(); @@ -778,6 +784,8 @@ public VectorDesc getVectorDesc() { currentVectorParentList = nextVectorParentList; } while (currentParentList.size() > 0); + runDelayedFixups(); + return dummyVectorOperator; } @@ -844,12 +852,44 @@ private void fixupNewVectorChild( if (childMultipleParent == parent) { childMultipleParents.set(i, vectorParent); } else { - fixupOtherParent(childMultipleParent, child, vectorChild); + queueDelayedFixup(childMultipleParent, child, vectorChild); } } vectorChild.setParentOperators(childMultipleParents); } + /* + * The fix up is delayed so that the parent operators aren't modified until the entire operator + * tree has been vectorized. + */ + private void queueDelayedFixup(Operator parent, + Operator child, Operator vectorChild) { + if (delayedFixups.get(parent) == null) { + HashSet, Operator>> value = + new HashSet, Operator>>(1); + delayedFixups.put(parent, value); + } + delayedFixups.get(parent).add( + new ImmutablePair, Operator>( + child, vectorChild)); + } + + private void runDelayedFixups() { + for (Entry, Set, Operator>>> delayed + : delayedFixups.entrySet()) { + Operator key = delayed.getKey(); + Set, Operator>> value = + delayed.getValue(); + for (Iterator, Operator>> iterator = + value.iterator(); iterator.hasNext();) { + ImmutablePair, Operator> swap = + iterator.next(); + fixupOtherParent(key, swap.getLeft(), swap.getRight()); + } + } + delayedFixups.clear(); + } + private void fixupOtherParent( Operator childMultipleParent, Operator child, diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin3.q ql/src/test/queries/clientpositive/vectorized_mapjoin3.q new file mode 100644 index 0000000..c216499 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_mapjoin3.q @@ -0,0 +1,53 @@ + +set hive.vectorized.execution.enabled=true; +set hive.cbo.enable=true; +set hive.stats.column.autogather=true; +set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.auto.convert.join=true; + +create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc; +create temporary table table_6 (int_col_0 int) stored as orc; + +insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959); + +insert into table_6 values (1000); + + +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null; + + +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null; + + +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; + + +set hive.explain.user=false; + +EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null; + + +EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null; + + +EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out new file mode 100644 index 0000000..4fb3466 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -0,0 +1,669 @@ +PREHOOK: query: create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_19 +POSTHOOK: query: create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_19 +PREHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_6 +POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_6 +PREHOOK: query: insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_19 +POSTHOOK: query: insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_19 +POSTHOOK: Lineage: table_19.decimal0801_col SCRIPT [] +POSTHOOK: Lineage: table_19.int_col_1 SCRIPT [] +PREHOOK: query: insert into table_6 values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_6 +POSTHOOK: query: insert into table_6 values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_6 +POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: decimal0801_col is not null (type: boolean) + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col2) IN (_col1) (type: boolean) + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: decimal(8,1)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values + vectorized: false + Map 2 + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:decimal0801_col:decimal(8,1), 1:int_col_1:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:decimal(8,1))) + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0] + bigTableValueColumnNums: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0] + outputColumnNames: _col0 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: decimal0801_col:decimal(8,1), int_col_1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:decimal0801_col:decimal(8,1), 1:int_col_1:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:decimal(8,1)), SelectColumnIsNotNull(col 1:int)) + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0] + bigTableValueColumnNums: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0] + outputColumnNames: _col0 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: decimal0801_col:decimal(8,1), int_col_1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + -- 2.4.0