diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e188619..31cdb1e 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -352,6 +352,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_decimal_udf.q,\ vector_decimal_udf2.q,\ vector_distinct_2.q,\ + vectorized_distinct_gby.q,\ vector_elt.q,\ vector_groupby4.q,\ vector_groupby6.q,\ @@ -420,6 +421,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vectorization_8.q,\ vectorization_9.q,\ vectorization_decimal_date.q,\ + vectorization_limit.q,\ vectorization_nested_udf.q,\ vectorization_not.q,\ vectorization_part.q,\ @@ -747,6 +749,8 @@ minillaplocal.query.files=\ vector_groupby_grouping_sets_limit.q,\ vector_groupby_grouping_window.q,\ vector_groupby_rollup1.q,\ + vector_groupby_sort_11.q,\ + vector_groupby_sort_8.q,\ vector_if_expr_2.q,\ vector_join30.q,\ vector_join_filters.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 33830b3..a822a4b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2876,13 +2876,13 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mod setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported"); return false; } - /* + // The planner seems to pull this one out. if (aggDesc.getDistinct()) { setExpressionIssue("Aggregation Function", "DISTINCT not supported"); - return new Pair(false, false); + return false; } - */ + ArrayList parameters = aggDesc.getParameters(); diff --git ql/src/test/queries/clientpositive/groupby_cube1.q ql/src/test/queries/clientpositive/groupby_cube1.q index fd2f0de..92456d0 100644 --- ql/src/test/queries/clientpositive/groupby_cube1.q +++ ql/src/test/queries/clientpositive/groupby_cube1.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.map.aggr=true; set hive.groupby.skewindata=false; diff --git ql/src/test/queries/clientpositive/groupby_grouping_id1.q ql/src/test/queries/clientpositive/groupby_grouping_id1.q index 9948ce9..7068d21 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id1.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_id2.q ql/src/test/queries/clientpositive/groupby_grouping_id2.q index cc7f9e4..ba755c4 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id2.q @@ -1,4 +1,5 @@ set hive.explain.user=false; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; set hive.cli.print.header=true; diff --git ql/src/test/queries/clientpositive/groupby_grouping_id3.q ql/src/test/queries/clientpositive/groupby_grouping_id3.q index 955dbe0..29b2f15 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id3.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets1.q ql/src/test/queries/clientpositive/groupby_grouping_sets1.q index c22c97f..86c5246 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets1.q @@ -1,4 +1,5 @@ set hive.explain.user=false; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; set hive.cli.print.header=true; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets2.q ql/src/test/queries/clientpositive/groupby_grouping_sets2.q index 90e6325..1934321 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets2.q @@ -1,4 +1,5 @@ set hive.explain.user=false; +SET hive.vectorized.execution.enabled=false; set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set hive.new.job.grouping.set.cardinality=2; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets3.q ql/src/test/queries/clientpositive/groupby_grouping_sets3.q index 16421e8..81267dc 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets3.q @@ -1,4 +1,5 @@ set hive.explain.user=false; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; set hive.cli.print.header=true; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets4.q ql/src/test/queries/clientpositive/groupby_grouping_sets4.q index 1074a3b..fa62992 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets4.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets4.q @@ -1,4 +1,5 @@ set hive.explain.user=false; +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.merge.mapfiles = false; set hive.merge.mapredfiles = false; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets5.q ql/src/test/queries/clientpositive/groupby_grouping_sets5.q index 570d464..829a0c2 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets5.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets5.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=false; set hive.merge.mapfiles = false; set hive.merge.mapredfiles = false; -- Set merging to false above to make the explain more readable diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets6.q ql/src/test/queries/clientpositive/groupby_grouping_sets6.q index e537bce..515dce3 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets6.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets6.q @@ -1,4 +1,6 @@ set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=false; + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 7157106..3f437a4 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/groupby_rollup1.q ql/src/test/queries/clientpositive/groupby_rollup1.q index f30ace0..94f533c 100644 --- ql/src/test/queries/clientpositive/groupby_rollup1.q +++ ql/src/test/queries/clientpositive/groupby_rollup1.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.map.aggr=true; set hive.groupby.skewindata=false; diff --git ql/src/test/queries/clientpositive/groupby_sort_11.q ql/src/test/queries/clientpositive/groupby_sort_11.q index 3b6c172..c56789d 100644 --- ql/src/test/queries/clientpositive/groupby_sort_11.q +++ ql/src/test/queries/clientpositive/groupby_sort_11.q @@ -1,7 +1,10 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.exec.reducers.max = 1; set hive.map.groupby.sorted=true; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; diff --git ql/src/test/queries/clientpositive/groupby_sort_8.q ql/src/test/queries/clientpositive/groupby_sort_8.q index 3f81a69..2c20b29 100644 --- ql/src/test/queries/clientpositive/groupby_sort_8.q +++ ql/src/test/queries/clientpositive/groupby_sort_8.q @@ -1,7 +1,10 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/vector_groupby_cube1.q ql/src/test/queries/clientpositive/vector_groupby_cube1.q index 1f7b467..d6bab2c 100644 --- ql/src/test/queries/clientpositive/vector_groupby_cube1.q +++ ql/src/test/queries/clientpositive/vector_groupby_cube1.q @@ -1,4 +1,4 @@ -SET hive.vectorized.execution.enabled=false; +SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; set hive.mapred.mode=nonstrict; set hive.map.aggr=true; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q index a0e874d..5a5757d 100644 --- ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q @@ -99,6 +99,35 @@ having grouping(key) = 1 OR grouping(value) = 1 order by x desc, case when x = 1 then key end; explain vectorization detail +select key, value, grouping(key), grouping(value) +from T1 +group by key, value; + +select key, value, grouping(key), grouping(value) +from T1 +group by key, value; + +explain vectorization detail +select key, value, grouping(value) +from T1 +group by key, value; + +select key, value, grouping(value) +from T1 +group by key, value; + +explain vectorization detail +select key, value +from T1 +group by key, value +having grouping(key) = 0; + +select key, value +from T1 +group by key, value +having grouping(key) = 0; + +explain vectorization detail select key, value, `grouping__id`, grouping(key, value) from T1 group by cube(key, value); diff --git ql/src/test/queries/clientpositive/vector_groupby_sort_11.q ql/src/test/queries/clientpositive/vector_groupby_sort_11.q new file mode 100644 index 0000000..012fe82 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_sort_11.q @@ -0,0 +1,50 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.mapred.mode=nonstrict; +set hive.exec.reducers.max = 1; +set hive.map.groupby.sorted=true; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +-- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +SELECT * from src where key < 10; + +-- The plan is optimized to perform partial aggregation on the mapper +EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1; +select count(distinct key) from T1; + +-- The plan is optimized to perform partial aggregation on the mapper +EXPLAIN VECTORIZATION DETAIL +select count(distinct key), count(1), count(key), sum(distinct key) from T1; +select count(distinct key), count(1), count(key), sum(distinct key) from T1; + +-- The plan is not changed in the presence of a grouping key +EXPLAIN VECTORIZATION DETAIL +select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; +select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; + +-- The plan is not changed in the presence of a grouping key +EXPLAIN VECTORIZATION DETAIL +select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; +select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; + +-- The plan is not changed in the presence of a grouping key expression +EXPLAIN VECTORIZATION DETAIL +select count(distinct key+key) from T1; +select count(distinct key+key) from T1; + +EXPLAIN VECTORIZATION DETAIL +select count(distinct 1) from T1; +select count(distinct 1) from T1; + +set hive.map.aggr=false; + +-- no plan change if map aggr is turned off +EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1; +select count(distinct key) from T1; diff --git ql/src/test/queries/clientpositive/vector_groupby_sort_8.q ql/src/test/queries/clientpositive/vector_groupby_sort_8.q new file mode 100644 index 0000000..b0c5699 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_sort_8.q @@ -0,0 +1,24 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.mapred.mode=nonstrict; +set hive.exec.reducers.max = 10; +set hive.map.groupby.sorted=true; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1'); + +-- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'; + +-- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is issueing a distinct. +-- However, after HIVE-4310, partial aggregation is performed on the mapper +EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1; +select count(distinct key) from T1; + +DROP TABLE T1; diff --git ql/src/test/results/clientpositive/groupby_sort_11.q.out ql/src/test/results/clientpositive/groupby_sort_11.q.out index 23c89f9..cbdc526 100644 --- ql/src/test/results/clientpositive/groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/groupby_sort_11.q.out @@ -211,12 +211,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 #### A masked pattern was here #### -1 3 3 0.0 1 1 1 2.0 1 1 1 4.0 -1 3 3 5.0 1 1 1 8.0 1 1 1 9.0 +1 3 3 0.0 +1 3 3 5.0 PREHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 400d930..ce35eb8 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -68,26 +68,12 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int, col 1:int, col 2:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -96,24 +82,16 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: @@ -200,7 +178,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]]) + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported vectorized: false Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index f1ed146..3bfbda0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -21,8 +21,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -41,12 +41,27 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -55,15 +70,58 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col3 @@ -72,9 +130,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -94,8 +159,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -114,12 +179,27 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -128,15 +208,58 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col3 @@ -145,9 +268,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,8 +323,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -213,12 +343,27 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -227,15 +372,58 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -243,9 +431,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -291,8 +486,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, count(distinct val) FROM T1 GROUP BY key with cube POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -328,8 +523,19 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -377,8 +583,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -398,12 +604,27 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -412,15 +633,59 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [4] + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: PARTIALS + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -429,13 +694,41 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 1] + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: FINAL + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: final outputColumnNames: _col0, _col1, _col3 @@ -444,9 +737,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -492,8 +792,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, count(distinct val) FROM T1 GROUP BY key with cube POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -530,8 +830,19 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -547,6 +858,11 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -614,8 +930,8 @@ INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-2 is a root stage @@ -643,12 +959,27 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -657,14 +988,33 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [4] + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 1) -> 6:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -673,15 +1023,59 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [4] + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: PARTIALS + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -690,10 +1084,22 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 1] + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -729,6 +1135,11 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) @@ -743,10 +1154,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: PARTIALS + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -755,10 +1187,22 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 1] + valueColumnNums: [3] Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 6 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -794,6 +1238,11 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 7 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 7f7624a..80e073b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -1430,6 +1430,469 @@ NULL 5 1 NULL NULL 1 NULL NULL 2 PREHOOK: query: explain vectorization detail +select key, value, grouping(key), grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, value, grouping(key), grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, KEY._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 0L (type: bigint), 0L (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + selectExpressions: ConstantVectorExpression(val 0) -> 2:bigint, ConstantVectorExpression(val 0) -> 3:bigint + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key), grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key), grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value _c2 _c3 +1 1 0 0 +1 NULL 0 0 +2 2 0 0 +3 3 0 0 +3 NULL 0 0 +4 5 0 0 +PREHOOK: query: explain vectorization detail +select key, value, grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, value, grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, KEY._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 0L (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val 0) -> 2:bigint + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value _c2 +1 1 0 +1 NULL 0 +2 2 0 +3 3 0 +3 NULL 0 +4 5 0 +PREHOOK: query: explain vectorization detail +select key, value +from T1 +group by key, value +having grouping(key) = 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, value +from T1 +group by key, value +having grouping(key) = 0 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, KEY._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by key, value +having grouping(key) = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by key, value +having grouping(key) = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +PREHOOK: query: explain vectorization detail select key, value, `grouping__id`, grouping(key, value) from T1 group by cube(key, value) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index e839214..ef49d90 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -210,27 +210,12 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1:string) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, ConstantVectorExpression(val 0) -> 3:bigint, col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: key (type: string), 0L (type: bigint), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -239,29 +224,15 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No DISTINCT columns IS false Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, val:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: @@ -303,12 +274,12 @@ POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 0 -2 0 -3 0 -7 0 -8 0 -NULL 0 +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup PREHOOK: type: QUERY @@ -539,27 +510,12 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1:string) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, ConstantVectorExpression(val 0) -> 3:bigint, col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: key (type: string), 0L (type: bigint), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -568,29 +524,15 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No DISTINCT columns IS false Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, val:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: @@ -652,12 +594,12 @@ POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 0 -2 0 -3 0 -7 0 -8 0 -NULL 0 +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out new file mode 100644 index 0000000..79ca6d9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -0,0 +1,996 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +SELECT * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +SELECT * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: FINAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [] + keys: key (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:string) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, val:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct key) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +6 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key), count(1), count(key), sum(distinct key) from T1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key), count(1), count(key), sum(distinct key) from T1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key) + bucketGroup: true + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col0:1._col0) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +6 10 10 28.0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key) + bucketGroup: true + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +1 1 1 2.0 +1 1 1 4.0 +1 1 1 8.0 +1 1 1 9.0 +1 3 3 0.0 +1 3 3 5.0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key) + bucketGroup: true + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +0 1 3 3 0.0 +2 1 1 1 2.0 +4 1 1 1 4.0 +5 1 3 3 5.0 +8 1 1 1 8.0 +9 1 1 1 9.0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key+key) from T1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key+key) from T1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] + Select Operator + expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 0:string) -> 5:double) -> 6:double + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 6:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, val:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [double, double, double] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:double) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct key+key) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key+key) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +6 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct 1) from T1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct 1) from T1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConstantVectorExpression(val 1) -> 4:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: 1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [] + dataColumns: key:string, val:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct 1) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct 1) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +1 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, val:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [] + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:string) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct key) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +6 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out new file mode 100644 index 0000000..6c6986e --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -0,0 +1,186 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t1@ds=1 +PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +PREHOOK: Output: default@t1@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +POSTHOOK: Output: default@t1@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(distinct key) from T1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: FINAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [] + keys: key (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:string) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, val:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct key) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=1 +#### A masked pattern was here #### +5 +PREHOOK: query: DROP TABLE T1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE T1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out new file mode 100644 index 0000000..c299796 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -0,0 +1,952 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1887561756 -10011.0 +-1887561756 -13877.0 +-1887561756 -2281.0 +-1887561756 -8881.0 +-1887561756 10361.0 +-1887561756 1839.0 +-1887561756 9531.0 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 5] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: smallint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 -10462.0 -10462 +-64 -15920.0 -15920 +-64 -1600.0 -1600 +-64 -200.0 -200 +-64 -2919.0 -2919 +-64 -3097.0 -3097 +-64 -3586.0 -3586 +-64 -4018.0 -4018 +-64 -4040.0 -4040 +-64 -4803.0 -4803 +-64 -6907.0 -6907 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -8080.0 -8080 +-64 -9842.0 -9842 +PREHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), (cdouble + 1.0D) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 13] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), count(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [1, 2] + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] + selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 3033.55 +-47 -574.6428571428571 +-48 1672.909090909091 +-49 768.7659574468086 +-50 -960.0192307692307 +-51 -96.46341463414635 +-52 2810.705882352941 +-53 -532.7567567567568 +-54 2712.7272727272725 +-55 2385.595744680851 +-56 2595.818181818182 +-57 1867.0535714285713 +-58 3483.2444444444445 +-59 318.27272727272725 +-60 1071.82 +-61 914.3404255319149 +-62 245.69387755102042 +-63 2178.7272727272725 +-64 373.52941176470586 +NULL 9370.0945309795 +PREHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 +-47 +-48 +-49 +-50 +-51 +-52 +-53 +-54 +-55 +-56 +-57 +-58 +-59 +-60 +-61 +-62 +-63 +-64 +NULL +PREHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double) + outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint), cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [] + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, KEY._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint, col 1:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:double) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-46 24 +-47 22 +-48 29 +-49 26 +-50 30 +-51 21 +-52 33 +-53 22 +-54 26 +-55 29 +-56 36 +-57 35 +-58 23 +-59 31 +-60 27 +-61 25 +-62 27 +-63 19 +-64 24 +NULL 2932 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:double, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-10462.0 -64 +-1121.0 -89 +-11322.0 -101 +-11492.0 -78 +-15920.0 -64 +-4803.0 -64 +-6907.0 -64 +-7196.0 -2009 +-8080.0 -64 +-8118.0 -80 +-9842.0 -64 +10496.0 -67 +15601.0 -1733 +3520.0 -86 +4811.0 -115 +5241.0 -80 +557.0 -75 +7705.0 -88 +9452.0 -76 +NULL -32768 diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out new file mode 100644 index 0000000..f19e2ca --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -0,0 +1,362 @@ +PREHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dtest +POSTHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dtest +PREHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dtest +POSTHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dtest +POSTHOOK: Lineage: dtest.a SCRIPT [] +POSTHOOK: Lineage: dtest.b SIMPLE [] +PREHOOK: query: explain vectorization detail +select sum(distinct a), count(distinct a) from dtest +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(distinct a), count(distinct a) from dtest +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dtest + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] + Select Operator + expressions: a (type: int) + outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: FINAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [] + keys: a (type: int) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), count(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: a:int, b:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(distinct a), count(distinct a) from dtest +PREHOOK: type: QUERY +PREHOOK: Input: default@dtest +#### A masked pattern was here #### +POSTHOOK: query: select sum(distinct a), count(distinct a) from dtest +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dtest +#### A masked pattern was here #### +300 1 +PREHOOK: query: explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: cint (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), UDFToDouble(_col0) (type: double), (UDFToDouble(_col0) * UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 4] + selectExpressions: CastLongToDouble(col 0:int) -> 1:double, DoubleColMultiplyDoubleColumn(col 2:double, col 3:double)(children: CastLongToDouble(col 0:int) -> 2:double, CastLongToDouble(col 0:int) -> 3:double) -> 4:double + Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 1:double) -> double + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), (UDFToDouble(_col0) / _col1) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 5, 4] + selectExpressions: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: CastLongToDouble(col 0:bigint) -> 4:double) -> 5:double, FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-3482841611 6082 -572647.4204209142 6.153814687328984E8 diff --git ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 59db9d1..8a81b34 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -470,27 +470,12 @@ STAGE PLANS: TableScan alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) - Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint, col 5:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -499,29 +484,14 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index 7e30870..85d5926 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -62,26 +62,12 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int, col 1:int, col 2:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -90,23 +76,14 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -182,7 +159,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]]) + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported vectorized: false Reduce Vectorization: enabled: false diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 8226fd4..517cb07 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -470,27 +470,12 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) - Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint, col 5:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -499,29 +484,14 @@ STAGE PLANS: key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 07576fd..3847481 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -37,28 +37,13 @@ STAGE PLANS: TableScan alias: dtest Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator expressions: a (type: int) outputColumnNames: a - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(DISTINCT a), count(DISTINCT a) bucketGroup: true - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] keys: a (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -66,28 +51,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: a:int, b:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -142,28 +112,12 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: cint (type: int), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 13, 16] - selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(DISTINCT _col0), count(DISTINCT _col0), sum(DISTINCT _col2), sum(DISTINCT _col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 2:int, col 16:double, col 13:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: int), _col2 (type: double), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -171,28 +125,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) sort order: +++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [2] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true