diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 391170f..2bf6d28 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -430,6 +430,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_insert_into_bucketed_table.q,\ vectorized_mapjoin.q,\ + vectorized_mapjoin3.q,\ vectorized_math_funcs.q,\ vectorized_nested_mapjoin.q,\ vectorized_parquet.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 1186bd4..3b996b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -27,6 +27,8 @@ import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -354,6 +356,8 @@ private BaseWork currentBaseWork; private Operator currentOperator; private Collection> vectorizedInputFormatExcludes; + private Map, Set, Operator>>> delayedFixups = + new IdentityHashMap, Set, Operator>>>(); public void testSetCurrentBaseWork(BaseWork testBaseWork) { currentBaseWork = testBaseWork; @@ -749,6 +753,8 @@ public VectorDesc getVectorDesc() { List> currentVectorParentList = newOperatorList(); currentVectorParentList.add(dummyVectorOperator); + delayedFixups.clear(); + do { List> nextParentList = newOperatorList(); List> nextVectorParentList= newOperatorList(); @@ -778,6 +784,8 @@ public VectorDesc getVectorDesc() { currentVectorParentList = nextVectorParentList; } while (currentParentList.size() > 0); + runDelayedFixups(); + return dummyVectorOperator; } @@ -844,12 +852,41 @@ private void fixupNewVectorChild( if (childMultipleParent == parent) { childMultipleParents.set(i, vectorParent); } else { - fixupOtherParent(childMultipleParent, child, vectorChild); + queueDelayedFixup(childMultipleParent, child, vectorChild); } } vectorChild.setParentOperators(childMultipleParents); } + /* + * The fix up is delayed so that the parent operators aren't modified until the entire operator + * tree has been vectorized. + */ + private void queueDelayedFixup(Operator parent, + Operator child, Operator vectorChild) { + if (delayedFixups.get(parent) == null) { + HashSet, Operator>> value = + new HashSet, Operator>>(1); + delayedFixups.put(parent, value); + } + delayedFixups.get(parent).add( + new ImmutablePair, Operator>( + child, vectorChild)); + } + + private void runDelayedFixups() { + for (Entry, Set, Operator>>> delayed + : delayedFixups.entrySet()) { + Operator key = delayed.getKey(); + Set, Operator>> value = + delayed.getValue(); + for (ImmutablePair, Operator> swap : value) { + fixupOtherParent(key, swap.getLeft(), swap.getRight()); + } + } + delayedFixups.clear(); + } + private void fixupOtherParent( Operator childMultipleParent, Operator child, diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin3.q ql/src/test/queries/clientpositive/vectorized_mapjoin3.q new file mode 100644 index 0000000..c216499 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_mapjoin3.q @@ -0,0 +1,53 @@ + +set hive.vectorized.execution.enabled=true; +set hive.cbo.enable=true; +set hive.stats.column.autogather=true; +set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.auto.convert.join=true; + +create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc; +create temporary table table_6 (int_col_0 int) stored as orc; + +insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959); + +insert into table_6 values (1000); + + +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null; + + +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null; + + +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; + + +set hive.explain.user=false; + +EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null; + + +EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null; + + +EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out new file mode 100644 index 0000000..00286ce --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -0,0 +1,699 @@ +PREHOOK: query: create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_19 +POSTHOOK: query: create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_19 +PREHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_6 +POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_6 +PREHOOK: query: insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_19 +POSTHOOK: query: insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_19 +POSTHOOK: Lineage: table_19.decimal0801_col SCRIPT [] +POSTHOOK: Lineage: table_19.int_col_1 SCRIPT [] +PREHOOK: query: insert into table_6 values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_6 +POSTHOOK: query: insert into table_6 values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_6 +POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:decimal0801_col:decimal(8,1), 1:int_col_1:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:decimal(8,1)), SelectColumnIsNotNull(col 1:int)) + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0] + bigTableValueColumnNums: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0] + outputColumnNames: _col0 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: decimal0801_col:decimal(8,1), int_col_1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:decimal0801_col:decimal(8,1), 1:int_col_1:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:decimal(8,1))) + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0] + bigTableValueColumnNums: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0] + outputColumnNames: _col0 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: decimal0801_col:decimal(8,1), int_col_1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:decimal0801_col:decimal(8,1), 1:int_col_1:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:decimal(8,1)), SelectColumnIsNotNull(col 1:int)) + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0] + bigTableValueColumnNums: [0] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0] + outputColumnNames: _col0 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: decimal0801_col:decimal(8,1), int_col_1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out new file mode 100644 index 0000000..fd1179d --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -0,0 +1,649 @@ +PREHOOK: query: create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_19 +POSTHOOK: query: create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_19 +PREHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_6 +POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_6 +PREHOOK: query: insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_19 +POSTHOOK: query: insert into table_19 values +(418.9, 1000), +(418.9, -759), +(418.9, -663), +(418.9, NULL), +(418.9, -959) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_19 +POSTHOOK: Lineage: table_19.decimal0801_col SCRIPT [] +POSTHOOK: Lineage: table_19.int_col_1 SCRIPT [] +PREHOOK: query: insert into table_6 values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_6 +POSTHOOK: query: insert into table_6 values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_6 +POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +PREHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@table_19 +PREHOOK: Input: default@table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_19 +POSTHOOK: Input: default@table_6 +#### A masked pattern was here #### +418.9 +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1 + Stage-5 has a backup stage: Stage-1 + Stage-3 depends on stages: Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: decimal0801_col is not null (type: boolean) + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2) IN (_col1) (type: boolean) + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(8,1)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: decimal0801_col is not null (type: boolean) + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(8,1)), _col1 (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2) IN (_col1) (type: boolean) + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(8,1)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (t1.int_col_1) IN (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:t1 + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:int] + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 633 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 633 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,1)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT t1.decimal0801_col +FROM table_19 t1 +WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:int_col_0:int, 1:ROW__ID:struct] + Select Operator + expressions: int_col_0 (type: int) + outputColumnNames: int_col_0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(int_col_0) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: int_col_0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:t1 + TableScan + alias: t1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 576 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:int] + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 633 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 633 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,1)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +