diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 62908f9..1b186f7 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2751,9 +2751,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT("hive.vectorized.use.vectorized.input.format", true, "This flag should be set to true to enable vectorizing with vectorized input file format capable SerDe.\n" + "The default value is true."), - HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", false, + HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", true, "This flag should be set to true to enable vectorizing rows using vector deserialize.\n" + - "The default value is false."), + "The default value is true."), HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", false, "This flag should be set to true to enable vectorizing using row deserialize.\n" + "The default value is false."), diff --git ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q index c692182..0c24441 100644 --- ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q @@ -20,3 +20,17 @@ from src where not key in (select key from src) order by key; + +CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; + +select * +from orcsrc +where not key in +(select key from orcsrc) +order by key; + +select * +from orcsrc +where not key in +(select key from orcsrc) +order by key; diff --git ql/src/test/queries/clientpositive/vectorized_parquet_types.q ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 68761b6..7467cb3 100644 --- ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -1,6 +1,5 @@ set hive.mapred.mode=nonstrict; -SET hive.vectorized.execution.enabled=true; -set hive.explain.user=true; +set hive.explain.user=false; DROP TABLE parquet_types_staging; DROP TABLE parquet_types; @@ -47,6 +46,8 @@ INSERT OVERWRITE TABLE parquet_types SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, unhex(cbinary), cdecimal FROM parquet_types_staging; +SET hive.vectorized.execution.enabled=true; + -- select explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, diff --git ql/src/test/results/clientpositive/llap/vector_bucket.q.out ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 7917ffd..7e47100 100644 --- ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -35,21 +35,37 @@ STAGE PLANS: TableScan alias: values__tmp__table__1 Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tmp_values_col1 (type: string), tmp_values_col2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 2b1ad08..4386230 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -55,21 +55,38 @@ STAGE PLANS: TableScan alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -141,21 +158,37 @@ STAGE PLANS: TableScan alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out index 9f93f86..ed9d551 100644 --- ql/src/test/results/clientpositive/llap/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -62,7 +62,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -111,7 +111,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -218,7 +218,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -267,7 +267,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -382,7 +382,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -431,7 +431,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -546,7 +546,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -595,7 +595,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index bc7873b..9d2b852 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -37,9 +37,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -47,6 +54,10 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Reducer 4 @@ -57,66 +68,110 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col5 input vertices: 1 Reducer 6 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 2, val 0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNotNull(col 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 2) -> boolean) -> boolean) -> boolean predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) Group By Vectorization: - vectorOutput: false + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true native: false - projectedOutputColumns: null + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -125,13 +180,21 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -254,3 +317,49 @@ order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +PREHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orcsrc +POSTHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcsrc +POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 48deecf..2af59a5 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -31,14 +31,23 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -47,54 +56,94 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -103,13 +152,21 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -241,14 +298,23 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -257,54 +323,95 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 16] + selectExpressions: ConstantVectorExpression(val 1) -> 16:long Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 17 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, FilterLongColEqualLongColumn(col 3, col 3) -> boolean) -> boolean predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 3 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -313,13 +420,21 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out index ae52b60..81dafa1 100644 --- ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/llap/vector_non_string_partition.q.out @@ -27,10 +27,14 @@ POSTHOOK: query: SHOW PARTITIONS non_string_part POSTHOOK: type: SHOWPARTITIONS POSTHOOK: Input: default@non_string_part ctinyint=__HIVE_DEFAULT_PARTITION__ -PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -38,6 +42,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -46,36 +51,79 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -104,10 +152,14 @@ POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ 799471 NULL 1248059 NULL 1286921 NULL -PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -115,6 +167,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -123,35 +176,78 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out index 56fb85c..aa63cb1 100644 --- ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out @@ -264,7 +264,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index 2b61979..37d05c8 100644 --- ql/src/test/results/clientpositive/llap/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -1,27 +1,71 @@ -PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +PREHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +POSTHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: (cdouble / 0.0) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double + Statistics: Num rows: 12288 Data size: 98304 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 100 Processor Tree: - TableScan - alias: alltypesorc - Select Operator - expressions: (cdouble / 0.0) (type: double) - outputColumnNames: _col0 - Limit - Number of rows: 100 - ListSink + ListSink PREHOOK: query: select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY @@ -131,20 +175,18 @@ NULL NULL NULL NULL -PREHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero - --- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +PREHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- There are no zeros in the table, but there is 988888, so use it as zero - --- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +POSTHOOK: query: explain vectorization expression select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -152,6 +194,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -160,36 +203,80 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / UDFToDouble((cbigint - 988888))) (type: double) + expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 17] + selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double) + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 1365 Data size: 174720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 12800 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -208,9 +295,9 @@ from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --985319 NULL -1.21787969175465E-6 --985319 2.0297994862577501E-4 -1.21787969175465E-6 --63925 0.11256941728588189 -1.8771998435666796E-5 +-985319 NULL -0.000001217879691754650 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-63925 0.11256941728588189 -0.000018771998435666797 0 NULL NULL 0 NULL NULL 0 NULL NULL @@ -220,106 +307,106 @@ POSTHOOK: Input: default@alltypesorc 0 NULL NULL 0 NULL NULL 0 NULL NULL -392309 NULL 3.05881333336732E-6 -673083 -0.010691103474608629 1.7828410463494101E-6 -2331159 NULL 5.147654021025593E-7 -2342037 NULL 5.123744842630582E-7 -3533105 -5.660743170667161E-5 3.3964459024002967E-7 -3768727 0.004139594085748318 3.184099033970887E-7 -4728619 NULL 2.5377388197272816E-7 -5391403 NULL 2.2257657236901044E-7 -7022666 -0.0010246820794268159 1.708752772807364E-7 -7470430 NULL 1.6063332365071354E-7 -8276429 NULL 1.4499006757624573E-7 -8286860 -8.683626850218298E-4 1.44807562816314E-7 -8299981 -8.669899364829872E-4 1.445786442161735E-7 -9247593 NULL 1.297634962957388E-7 -9821695 -7.326637611939691E-4 1.2217850381222386E-7 -10000738 0.001559984873116364 1.1999114465352456E-7 -10081828 0.0015474376273826532 1.190260337708598E-7 -10745355 -6.696847149303117E-4 1.1167616146697805E-7 -11127199 -1.797397530142132E-5 1.0784385180852791E-7 -11722580 NULL 1.023665438836843E-7 -12649396 NULL 9.486618965838368E-8 -13126214 -1.5236685917203544E-5 9.142011550322126E-8 -14042667 NULL 8.545385288991044E-8 -14943972 -1.3383322720358416E-5 8.02999363221505E-8 -16259022 NULL 7.380517721176587E-8 -16531556 -1.2098074736582569E-5 7.258844841949542E-8 -16596157 NULL 7.230589587697923E-8 -17058489 -1.1724367849930905E-5 7.034620709958544E-8 -17247320 -4.172242412154468E-4 6.957602688417679E-8 -19004427 8.209139901981786E-4 6.314318237534864E-8 -19498517 NULL 6.154314197331007E-8 -20165679 7.736411950224934E-4 5.95070466013071E-8 -20547875 NULL 5.840019953401507E-8 -23264783 NULL 5.158010715165492E-8 -23475527 6.645644206411213E-4 5.111706331448917E-8 -24379905 NULL 4.922086447834805E-8 -24514624 -2.935390728407664E-4 4.895037345871591E-8 -25154198 -2.860755091456305E-4 4.770575472133916E-8 -25245192 -7.922300610745999E-6 4.7533803664475993E-8 -26610943 NULL 4.509423059528556E-8 -27520143 5.668938566198584E-4 4.360442458456702E-8 -27818379 NULL 4.313694913711543E-8 -28400244 NULL 4.225315810666979E-8 -28698999 5.43607810153936E-4 4.18133050563889E-8 -28806400 -6.9429015774272385E-6 4.165740946456343E-8 -29920877 5.214085135271938E-4 4.010577631130264E-8 -33126539 NULL 3.622473207961749E-8 -34603086 NULL 3.467898787986713E-8 -35156265 NULL 3.413331876978399E-8 -35862260 NULL 3.346136021544654E-8 -36123797 -1.992038655294182E-4 3.321909931007529E-8 -36341671 -1.980096072082101E-4 3.301994561559924E-8 -36413215 -5.4925114412446145E-6 3.2955068647467685E-8 -36578596 4.2650625518814335E-4 3.280607052277239E-8 -36796441 -1.955623914823719E-4 3.2611849607955287E-8 -39723587 NULL 3.0208752296211316E-8 -39985709 -1.7996429674411925E-4 3.001072208073139E-8 -40018606 NULL 2.998605198791782E-8 -41003161 NULL 2.9266036342905367E-8 -41158231 3.790493328053871E-4 2.9155772025284565E-8 -41848817 NULL 2.8674645689506587E-8 -44047567 -1.633688416888043E-4 2.724327543448654E-8 -45125678 NULL 2.6592398234991615E-8 -45180154 NULL 2.6560334433565674E-8 -45717793 3.4124569399052136E-4 2.6247986205283355E-8 -46163162 NULL 2.5994753132378583E-8 -46525838 3.353190543284787E-4 2.5792120068852925E-8 -48626663 NULL 2.4677819244968545E-8 -49102701 -1.465499830650864E-4 2.4438574163160596E-8 -50300445 -1.4306036457530346E-4 2.3856647789100076E-8 -50929325 -1.412938420055636E-4 2.356206370298448E-8 -52422534 -1.3726921327381848E-4 2.2890919389741823E-8 -52667422 2.9621727070673783E-4 2.2784483356713376E-8 -52962061 2.945693522010029E-4 2.265772852004381E-8 -53695172 NULL 2.234837798824818E-8 -54760317 NULL 2.1913678841559662E-8 -55020655 2.835480602693661E-4 2.180999117513232E-8 -56102034 NULL 2.1389598815615135E-8 -56131313 NULL 2.13784416551952E-8 -56838351 -3.5187509222426247E-6 2.1112505533455745E-8 -56997841 -3.5089048372902406E-6 2.105342902374144E-8 -57778807 -1.2454393528755274E-4 2.076886080392764E-8 -58080381 NULL 2.0661021490199935E-8 -58307527 NULL 2.058053328174937E-8 -58536385 -1.2293208745295768E-4 2.0500070170031853E-8 -59347745 NULL 2.0219807846111087E-8 -60229567 NULL 1.992376933408802E-8 -60330397 NULL 1.9890470801974003E-8 -PREHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero - -explain +392309 NULL 0.000003058813333367320 +673083 -0.010691103474608629 0.000001782841046349410 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 +3533105 -5.660743170667161E-5 0.000000339644590240030 +3768727 0.004139594085748318 0.000000318409903397089 +4728619 NULL 0.000000253773881972728 +5391403 NULL 0.000000222576572369010 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 +10000738 0.001559984873116364 0.000000119991144653525 +10081828 0.0015474376273826532 0.000000119026033770860 +10745355 -6.696847149303117E-4 0.000000111676161466978 +11127199 -1.797397530142132E-5 0.000000107843851808528 +11722580 NULL 0.000000102366543883684 +12649396 NULL 0.000000094866189658384 +13126214 -1.5236685917203544E-5 0.000000091420115503221 +14042667 NULL 0.000000085453852889910 +14943972 -1.3383322720358416E-5 0.000000080299936322150 +16259022 NULL 0.000000073805177211766 +16531556 -1.2098074736582569E-5 0.000000072588448419495 +16596157 NULL 0.000000072305895876979 +17058489 -1.1724367849930905E-5 0.000000070346207099585 +17247320 -4.172242412154468E-4 0.000000069576026884177 +19004427 8.209139901981786E-4 0.000000063143182375349 +19498517 NULL 0.000000061543141973310 +20165679 7.736411950224934E-4 0.000000059507046601307 +20547875 NULL 0.000000058400199534015 +23264783 NULL 0.000000051580107151655 +23475527 6.645644206411213E-4 0.000000051117063314489 +24379905 NULL 0.000000049220864478348 +24514624 -2.935390728407664E-4 0.000000048950373458716 +25154198 -2.860755091456305E-4 0.000000047705754721339 +25245192 -7.922300610745999E-6 0.000000047533803664476 +26610943 NULL 0.000000045094230595286 +27520143 5.668938566198584E-4 0.000000043604424584567 +27818379 NULL 0.000000043136949137115 +28400244 NULL 0.000000042253158106670 +28698999 5.43607810153936E-4 0.000000041813305056389 +28806400 -6.9429015774272385E-6 0.000000041657409464563 +29920877 5.214085135271938E-4 0.000000040105776311303 +33126539 NULL 0.000000036224732079617 +34603086 NULL 0.000000034678987879867 +35156265 NULL 0.000000034133318769784 +35862260 NULL 0.000000033461360215447 +36123797 -1.992038655294182E-4 0.000000033219099310075 +36341671 -1.980096072082101E-4 0.000000033019945615599 +36413215 -5.4925114412446145E-6 0.000000032955068647468 +36578596 4.2650625518814335E-4 0.000000032806070522772 +36796441 -1.955623914823719E-4 0.000000032611849607955 +39723587 NULL 0.000000030208752296211 +39985709 -1.7996429674411925E-4 0.000000030010722080731 +40018606 NULL 0.000000029986051987918 +41003161 NULL 0.000000029266036342905 +41158231 3.790493328053871E-4 0.000000029155772025285 +41848817 NULL 0.000000028674645689507 +44047567 -1.633688416888043E-4 0.000000027243275434487 +45125678 NULL 0.000000026592398234992 +45180154 NULL 0.000000026560334433566 +45717793 3.4124569399052136E-4 0.000000026247986205283 +46163162 NULL 0.000000025994753132379 +46525838 3.353190543284787E-4 0.000000025792120068853 +48626663 NULL 0.000000024677819244969 +49102701 -1.465499830650864E-4 0.000000024438574163161 +50300445 -1.4306036457530346E-4 0.000000023856647789100 +50929325 -1.412938420055636E-4 0.000000023562063702984 +52422534 -1.3726921327381848E-4 0.000000022890919389742 +52667422 2.9621727070673783E-4 0.000000022784483356713 +52962061 2.945693522010029E-4 0.000000022657728520044 +53695172 NULL 0.000000022348377988248 +54760317 NULL 0.000000021913678841560 +55020655 2.835480602693661E-4 0.000000021809991175132 +56102034 NULL 0.000000021389598815615 +56131313 NULL 0.000000021378441655195 +56838351 -3.5187509222426247E-6 0.000000021112505533456 +56997841 -3.5089048372902406E-6 0.000000021053429023741 +57778807 -1.2454393528755274E-4 0.000000020768860803928 +58080381 NULL 0.000000020661021490200 +58307527 NULL 0.000000020580533281749 +58536385 -1.2293208745295768E-4 0.000000020500070170032 +59347745 NULL 0.000000020219807846111 +60229567 NULL 0.000000019923769334088 +60330397 NULL 0.000000019890470801974 +PREHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- There are no zeros in the table, but there is -200.0, so use it as zero - -explain +POSTHOOK: query: explain vectorization expression select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -327,6 +414,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -335,36 +423,80 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 15, 16, 14, 17] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 1, 3, 4] + Statistics: Num rows: 1365 Data size: 65520 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index afaf2d6..5700005 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -1,26 +1,59 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 7 Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Limit - Number of rows: 7 - ListSink + ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 @@ -38,16 +71,16 @@ POSTHOOK: Input: default@alltypesorc -1887561756 -8881.0 -1887561756 -2281.0 -1887561756 9531.0 -PREHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown - -explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown - -explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,6 +88,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -63,37 +97,79 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -130,14 +206,16 @@ POSTHOOK: Input: default@alltypesorc -64 -2919.0 -2919 -64 -1600.0 -1600 -64 -200.0 -200 -PREHOOK: query: -- deduped RS -explain +PREHOOK: query: explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- deduped RS -explain +POSTHOOK: query: explain vectorization expression select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -145,6 +223,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -153,43 +232,73 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 12] + selectExpressions: DoubleColAddDoubleScalar(col 5, val 1.0) -> 12:double + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 12) -> struct + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: uber + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -226,14 +335,16 @@ NULL 9370.0945309795 -48 1672.909090909091 -47 -574.6428571428571 -46 3033.55 -PREHOOK: query: -- distincts -explain +PREHOOK: query: explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- distincts -explain +POSTHOOK: query: explain vectorization expression select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -241,6 +352,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -249,40 +361,86 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -319,12 +477,16 @@ NULL -48 -47 -46 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -332,6 +494,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -340,45 +503,98 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5] + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 5 + native: false + projectedOutputColumns: [] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 - Execution mode: uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -415,14 +631,16 @@ NULL 2932 -48 29 -47 22 -46 24 -PREHOOK: query: -- limit zero -explain +PREHOOK: query: explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 PREHOOK: type: QUERY -POSTHOOK: query: -- limit zero -explain +POSTHOOK: query: explain vectorization expression select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -441,14 +659,16 @@ POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limi POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -PREHOOK: query: -- 2MR (applied to last RS) -explain +PREHOOK: query: explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: -- 2MR (applied to last RS) -explain +POSTHOOK: query: explain vectorization expression select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -456,6 +676,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) @@ -465,57 +686,124 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0] + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 - Execution mode: vectorized, uber + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out index d5a1f47..8be0cac 100644 --- ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_offset_limit.q.out @@ -118,7 +118,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 277ca32..1ff6fe3 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -411,21 +411,36 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 3eb655d..9d9b693 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -76,12 +76,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -249,12 +253,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -401,12 +409,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -554,12 +566,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -773,12 +789,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -968,12 +988,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -1135,12 +1159,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -1283,12 +1311,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -1435,12 +1467,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -1581,12 +1617,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 4 Map Operator Tree: TableScan @@ -1733,12 +1773,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 4 Map Operator Tree: TableScan @@ -1885,12 +1929,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2022,12 +2070,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2172,12 +2224,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2335,12 +2391,16 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2359,12 +2419,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2495,12 +2559,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2637,12 +2705,16 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2804,12 +2876,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -2979,12 +3055,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3068,12 +3148,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -3204,12 +3288,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -3638,12 +3726,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -3662,12 +3754,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 8 Map Operator Tree: TableScan @@ -3686,12 +3782,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3889,12 +3989,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 4 Map Operator Tree: TableScan @@ -3913,12 +4017,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 8 Map Operator Tree: TableScan @@ -3937,12 +4045,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -4144,12 +4256,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 10 Map Operator Tree: TableScan @@ -4168,12 +4284,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan @@ -4190,12 +4310,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -4214,12 +4338,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 11 Execution mode: vectorized, llap Reduce Vectorization: @@ -4474,12 +4602,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -4644,12 +4776,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -4847,12 +4983,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -5019,12 +5159,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -5165,12 +5309,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 3 Map Operator Tree: TableScan @@ -5311,12 +5459,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 3 Map Operator Tree: TableScan @@ -5454,12 +5606,16 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -5478,12 +5634,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5624,12 +5784,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -5778,12 +5942,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -5848,12 +6016,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -5984,12 +6156,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -6335,12 +6511,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -6359,12 +6539,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan @@ -6383,12 +6567,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_join46.q.out ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index 1bd4a60..3948339 100644 --- ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -84,7 +84,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -101,7 +101,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -127,9 +127,9 @@ POSTHOOK: Input: default@test2 NULL NULL None NULL NULL NULL 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema PREHOOK: query: EXPLAIN @@ -187,7 +187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -207,7 +207,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -294,7 +294,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -312,7 +312,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -381,7 +381,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -409,7 +409,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -432,12 +432,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del 101 2 Car 102 2 Del 99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -490,7 +490,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -505,7 +505,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -610,7 +610,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -718,7 +718,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -824,7 +824,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -928,7 +928,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Stage: Stage-0 @@ -1002,7 +1002,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -1110,7 +1110,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -1213,7 +1213,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -1319,7 +1319,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 2 Map Operator Tree: @@ -1420,7 +1420,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: @@ -1435,7 +1435,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap @@ -1533,7 +1533,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: @@ -1548,7 +1548,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap @@ -1644,7 +1644,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: @@ -1659,7 +1659,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap @@ -1755,7 +1755,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: @@ -1772,7 +1772,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out index 7621cbe..fbe0b6c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out @@ -103,7 +103,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) @@ -147,7 +147,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 45 Data size: 181 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index e65eb2e..fd034ab 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -123,15 +123,24 @@ POSTHOOK: query: explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - TableScan [TS_0] - Output:["cint","ctinyint","csmallint","cfloat","cdouble","cstring1","t","cchar","cvarchar","cbinary","cdecimal"] +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: parquet_types + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), t (type: timestamp), cchar (type: char(5)), cvarchar (type: varchar(10)), hex(cbinary) (type: string), cdecimal (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + ListSink PREHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types @@ -171,15 +180,24 @@ PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage -Stage-0 - Fetch Operator - limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_0] - Output:["cchar","cvarchar","cdecimal"] +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: parquet_types + Select Operator + expressions: cchar (type: char(5)), length(cchar) (type: int), cvarchar (type: varchar(10)), length(cvarchar) (type: int), cdecimal (type: decimal(4,2)), sign(cdecimal) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + ListSink PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types PREHOOK: type: QUERY @@ -235,33 +253,122 @@ FROM parquet_types GROUP BY ctinyint ORDER BY ctinyint POSTHOOK: type: QUERY -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), cdecimal (type: decimal(4,2)) + outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble, cdecimal + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 5, 3, 4, 10] + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCount(col 5) -> bigint, VectorUDAFAvgDouble(col 3) -> struct, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFMaxDecimal(col 10) -> decimal(4,2) + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_12] - Select Operator [SEL_11] (rows=11 width=11) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Group By Operator [GBY_4] (rows=11 width=11) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["max(VALUE._col0)","min(VALUE._col1)","count(VALUE._col2)","avg(VALUE._col3)","stddev_pop(VALUE._col4)","max(VALUE._col5)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_3] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=22 width=11) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["max(cint)","min(csmallint)","count(cstring1)","avg(cfloat)","stddev_pop(cdouble)","max(cdecimal)"],keys:ctinyint - Select Operator [SEL_9] (rows=22 width=11) - Output:["ctinyint","cint","csmallint","cstring1","cfloat","cdouble","cdecimal"] - TableScan [TS_0] (rows=22 width=11) - default@parquet_types,parquet_types,Tbl:COMPLETE,Col:NONE,Output:["cint","ctinyint","csmallint","cfloat","cdouble","cstring1","cdecimal"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: SELECT ctinyint, MAX(cint), diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 7687cff..5621895 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -29,21 +29,40 @@ STAGE PLANS: TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -51,30 +70,51 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -89,14 +129,23 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -105,11 +154,21 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized Local Work: @@ -241,21 +300,41 @@ STAGE PLANS: TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 16] + selectExpressions: ConstantVectorExpression(val 1) -> 16:long Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -263,30 +342,51 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, FilterLongColEqualLongColumn(col 3, col 3) -> boolean) -> boolean predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 3 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -301,14 +401,23 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -317,11 +426,21 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized Local Work: diff --git ql/src/test/results/clientpositive/vector_bucket.q.out ql/src/test/results/clientpositive/vector_bucket.q.out index 2825489..91fad66 100644 --- ql/src/test/results/clientpositive/vector_bucket.q.out +++ ql/src/test/results/clientpositive/vector_bucket.q.out @@ -28,19 +28,36 @@ STAGE PLANS: TableScan alias: values__tmp__table__1 Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: tmp_values_col1 (type: string), tmp_values_col2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index bf21732..79be0f0 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -192,16 +192,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index 8c5dd3e..a848044 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -147,16 +147,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -325,16 +338,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_decimal_round.q.out ql/src/test/results/clientpositive/vector_decimal_round.q.out index b1fd628..a6493c6 100644 --- ql/src/test/results/clientpositive/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/vector_decimal_round.q.out @@ -49,19 +49,37 @@ STAGE PLANS: TableScan alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -115,19 +133,36 @@ STAGE PLANS: TableScan alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,0)) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 0423e47..3efd4a4 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -99,15 +99,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index a801942..7083f70 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -99,15 +99,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index feada86..3662ad7 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -36,27 +36,46 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(key) Group By Vectorization: - vectorOutput: false + aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true native: false - projectedOutputColumns: null + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -102,24 +121,40 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 1 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -144,31 +179,55 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Map Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3, _col5 Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 2, val 0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNotNull(col 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 2) -> boolean) -> boolean) -> boolean predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -176,15 +235,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -248,15 +320,24 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: key (type: string) outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -265,11 +346,21 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -318,3 +409,49 @@ order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +PREHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orcsrc +POSTHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcsrc +POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +PREHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +PREHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: select * +from orcsrc +where not key in +(select key from orcsrc) +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index a464409..42fd292 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -473,14 +473,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -736,15 +749,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -1000,15 +1026,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index a2f59d5..ec7c5a7 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -27,14 +27,23 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -43,11 +52,21 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -123,12 +142,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -137,23 +164,40 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -216,14 +260,23 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - vectorOutput: false + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 native: false - projectedOutputColumns: null + projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 @@ -232,11 +285,21 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -312,12 +375,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -326,23 +397,40 @@ STAGE PLANS: keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out index b6cdce1..0fa8e2f 100644 --- ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out @@ -371,10 +371,15 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -403,10 +408,15 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 5c5e2a0..86d08a5 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -193,15 +193,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index ee2f05d..4b09c86 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -114,16 +114,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 3051 Data size: 720036 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(20,10)) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index ca7d0c8..2c4a33a 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -399,15 +399,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index ad7af88..a78f77d 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -273,14 +273,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index cb57133..780eb02 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -157,15 +157,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -459,15 +472,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 775c3ef..ec4f7cd 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -139,10 +139,15 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 0195f2f..4a1ed96 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -135,10 +135,15 @@ STAGE PLANS: sort order: +++++++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 17cdd51..4a85eba 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -677,15 +677,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false, Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 24b1ad4..ffde8b2 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1288,15 +1288,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index cd16b3b..8ca1381 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -403,15 +403,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index 3a7bbbb..703e23f 100644 --- ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -95,9 +95,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col3] not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -123,15 +125,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true