diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index d7d8b6fee1..6efc6d3d06 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2748,7 +2748,11 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, return false; } - if (!validateAggregationDescs(desc.getAggregators(), desc.getMode(), hasKeys)) { + //TODO: isGroupingSetsPresent() is returning false, even though + // ListGroupingSets is present. Need to check if there is hidden bug. + boolean isGroupingSetsPresent = (desc.getListGroupingSets() != null && !desc.getListGroupingSets().isEmpty()); + if (!validateAggregationDescs(desc.getAggregators(), desc.getMode(), + isGroupingSetsPresent, hasKeys)) { return false; } @@ -3002,10 +3006,12 @@ private boolean validateExprNodeDesc(List descs, } private boolean validateAggregationDescs(List descs, - GroupByDesc.Mode groupByMode, boolean hasKeys) { + GroupByDesc.Mode groupByMode, boolean isGroupingSetsPresent, + boolean hasKeys) { for (AggregationDesc d : descs) { - if (!validateAggregationDesc(d, groupByMode, hasKeys)) { + if (!validateAggregationDesc(d, groupByMode, isGroupingSetsPresent, + hasKeys)) { return false; } } @@ -3162,7 +3168,7 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } private boolean validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mode groupByMode, - boolean hasKeys) { + boolean isGroupingSetsPresent, boolean hasKeys) { String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { @@ -3171,11 +3177,16 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mod } // The planner seems to pull this one out. - if (aggDesc.getDistinct()) { + if (groupByMode != GroupByDesc.Mode.HASH && aggDesc.getDistinct()) { setExpressionIssue("Aggregation Function", "DISTINCT not supported"); return false; } + if (isGroupingSetsPresent && aggDesc.getDistinct()) { + setExpressionIssue("Aggregation Function", "DISTINCT with Groupingsets not supported"); + return false; + } + List parameters = aggDesc.getParameters(); diff --git a/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q b/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q index a64a60718f..c231cd2142 100644 --- a/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q +++ b/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q @@ -18,3 +18,14 @@ select sum(distinct a), count(distinct a) from dtest; explain vectorization detail select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; + +explain vectorization detail +select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE'test%1' ) + OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint; + +select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE +'test%1' ) OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint; diff --git a/ql/src/test/results/clientpositive/auto_join18.q.out b/ql/src/test/results/clientpositive/auto_join18.q.out index 272babd20b..0473494c2a 100644 --- a/ql/src/test/results/clientpositive/auto_join18.q.out +++ b/ql/src/test/results/clientpositive/auto_join18.q.out @@ -169,6 +169,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out index 3656270e7b..961a320982 100644 --- a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out @@ -171,6 +171,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 12 Data size: 2292 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out index 124c2f63d6..f215592bac 100644 --- a/ql/src/test/results/clientpositive/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -95,6 +95,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/distinct_stats.q.out b/ql/src/test/results/clientpositive/distinct_stats.q.out index 6dd51cef69..c1ebfc3d57 100644 --- a/ql/src/test/results/clientpositive/distinct_stats.q.out +++ b/ql/src/test/results/clientpositive/distinct_stats.q.out @@ -64,6 +64,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out index 819196315b..178fb1526b 100644 --- a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -83,6 +83,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/groupby9.q.out b/ql/src/test/results/clientpositive/groupby9.q.out index d46ca78e96..6abb4b03f8 100644 --- a/ql/src/test/results/clientpositive/groupby9.q.out +++ b/ql/src/test/results/clientpositive/groupby9.q.out @@ -81,6 +81,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -968,6 +969,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -1855,6 +1857,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -3632,6 +3635,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out b/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out index 1c926364a8..901d6378ff 100644 --- a/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out +++ b/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out @@ -54,6 +54,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -111,6 +112,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index 37d60f1836..991f343394 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -81,6 +81,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out b/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out index 19ae1380fd..246bca9dc4 100644 --- a/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out +++ b/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out @@ -48,6 +48,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index f52623a534..f4cc42e991 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -79,6 +79,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -372,6 +373,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/join18.q.out b/ql/src/test/results/clientpositive/join18.q.out index e40427b16a..85af7a6a72 100644 --- a/ql/src/test/results/clientpositive/join18.q.out +++ b/ql/src/test/results/clientpositive/join18.q.out @@ -135,6 +135,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/join18_multi_distinct.q.out index f40dd4bffe..9f7c063c8d 100644 --- a/ql/src/test/results/clientpositive/join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/join18_multi_distinct.q.out @@ -137,6 +137,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 12 Data size: 2292 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) diff --git a/ql/src/test/results/clientpositive/llap/count.q.out b/ql/src/test/results/clientpositive/llap/count.q.out index 777e579c47..50981744fb 100644 --- a/ql/src/test/results/clientpositive/llap/count.q.out +++ b/ql/src/test/results/clientpositive/llap/count.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 738bf44cf8..1ed2a7a24b 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -3570,11 +3570,11 @@ Stage-4 Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 3 [SIMPLE_EDGE] <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_63] + Reduce Output Operator [RS_64] PartitionCols:_col0, _col1 - Group By Operator [GBY_62] (rows=1 width=272) + Group By Operator [GBY_63] (rows=1 width=272) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=500 width=10) + Select Operator [SEL_62] (rows=500 width=10) Output:["_col0","_col1"] TableScan [TS_48] (rows=500 width=10) Output:["key","value"] @@ -3784,42 +3784,42 @@ Stage-4 Group By Operator [GBY_15] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_52] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_73] PartitionCols:_col0 - Group By Operator [GBY_50] (rows=1 width=280) + Group By Operator [GBY_71] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_48] (rows=500 width=10) + Select Operator [SEL_70] (rows=500 width=10) Output:["_col0","_col1"] TableScan [TS_47] (rows=500 width=10) Output:["key","value"] - Reduce Output Operator [RS_53] + Reduce Output Operator [RS_74] PartitionCols:_col0, _col1 - Group By Operator [GBY_51] (rows=1 width=464) + Group By Operator [GBY_72] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_48] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_59] + Please refer to the previous Select Operator [SEL_70] + <-Map 9 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_78] PartitionCols:_col0 - Group By Operator [GBY_57] (rows=1 width=280) + Group By Operator [GBY_76] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_55] (rows=500 width=10) + Select Operator [SEL_75] (rows=500 width=10) Output:["_col0","_col1"] TableScan [TS_54] (rows=500 width=10) Output:["key","value"] - Reduce Output Operator [RS_60] + Reduce Output Operator [RS_79] PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=1 width=464) + Group By Operator [GBY_77] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_55] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_45] + Please refer to the previous Select Operator [SEL_75] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_68] PartitionCols:_col0 - Group By Operator [GBY_43] (rows=1 width=280) + Group By Operator [GBY_66] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_41] (rows=1 width=272) + Select Operator [SEL_65] (rows=1 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_40] (rows=1 width=8) + Group By Operator [GBY_64] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap PARTITION_ONLY_SHUFFLE [RS_63] @@ -3828,11 +3828,11 @@ Stage-4 Select Operator [SEL_61] (rows=500 width=10) TableScan [TS_0] (rows=500 width=10) default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_46] + Reduce Output Operator [RS_69] PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=1 width=464) + Group By Operator [GBY_67] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_65] PARTITION_ONLY_SHUFFLE [RS_22] Group By Operator [GBY_21] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] @@ -3920,28 +3920,28 @@ Stage-4 Group By Operator [GBY_13] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_50] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_64] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=1 width=280) + Group By Operator [GBY_62] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_46] (rows=500 width=10) + Select Operator [SEL_61] (rows=500 width=10) Output:["_col0","_col1"] TableScan [TS_45] (rows=500 width=10) Output:["key","value"] - Reduce Output Operator [RS_51] + Reduce Output Operator [RS_65] PartitionCols:_col0, _col1 - Group By Operator [GBY_49] (rows=1 width=464) + Group By Operator [GBY_63] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_46] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_43] + Please refer to the previous Select Operator [SEL_61] + <-Reducer 2 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_59] PartitionCols:_col0 - Group By Operator [GBY_41] (rows=1 width=280) + Group By Operator [GBY_57] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_39] (rows=1 width=272) + Select Operator [SEL_56] (rows=1 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_38] (rows=1 width=8) + Group By Operator [GBY_55] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap PARTITION_ONLY_SHUFFLE [RS_54] @@ -3950,11 +3950,11 @@ Stage-4 Select Operator [SEL_52] (rows=500 width=10) TableScan [TS_0] (rows=500 width=10) default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_44] + Reduce Output Operator [RS_60] PartitionCols:_col0, _col1 - Group By Operator [GBY_42] (rows=1 width=464) + Group By Operator [GBY_58] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_39] + Please refer to the previous Select Operator [SEL_56] PARTITION_ONLY_SHUFFLE [RS_20] Group By Operator [GBY_19] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] diff --git a/ql/src/test/results/clientpositive/llap/metadataonly1.q.out b/ql/src/test/results/clientpositive/llap/metadataonly1.q.out index 2db37e43f8..54cd83ddee 100644 --- a/ql/src/test/results/clientpositive/llap/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/llap/metadataonly1.q.out @@ -305,7 +305,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 192 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 auto parallelism: false - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: nullscan://null/default.test1_n12/part_ds=1_ [test1_n12] @@ -1048,7 +1048,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 376 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 auto parallelism: true - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: nullscan://null/default.test2_n8/part_ds=1_hr=1_ [test2_n8] @@ -1830,7 +1830,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 376 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 auto parallelism: true - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: nullscan://null/default.test2_n8/part_ds=01_10_10_hr=01_ [test2_n8] diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 7b6bd1f8cb..57885f7cf5 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -123,7 +123,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 9 Map Operator Tree: @@ -160,10 +160,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1102,7 +1102,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 7 Map Operator Tree: @@ -1159,7 +1159,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 Execution mode: llap @@ -1264,7 +1264,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2097,7 +2097,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Map 7 Map Operator Tree: @@ -2154,7 +2154,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 Execution mode: llap @@ -2259,7 +2259,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3103,10 +3103,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/llap/vector_count.q.out b/ql/src/test/results/clientpositive/llap/vector_count.q.out index 65f21b5699..1b196ce667 100644 --- a/ql/src/test/results/clientpositive/llap/vector_count.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -72,12 +72,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int, col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) minReductionHashAggr: 0.14285713 mode: hash @@ -88,16 +102,24 @@ STAGE PLANS: null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No DISTINCT columns IS false Statistics: Num rows: 3 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -190,7 +212,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]]) vectorized: false Reducer 2 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index c30f7ff2c1..96c83be6b2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -548,7 +548,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT with Groupingsets not supported vectorized: false Reducer 2 Execution mode: llap @@ -867,7 +867,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT with Groupingsets not supported vectorized: false Reducer 2 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index ce27c9d147..e6ae542cbe 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -243,7 +243,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT with Groupingsets not supported vectorized: false Reducer 2 Execution mode: llap @@ -556,7 +556,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT with Groupingsets not supported vectorized: false Reducer 2 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index df7a903eb7..e64d08517c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -230,7 +230,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble vectorized: false Reducer 2 Execution mode: llap @@ -328,7 +328,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble vectorized: false Reducer 2 Execution mode: llap @@ -436,7 +436,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble vectorized: false Reducer 2 Execution mode: llap diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 60cd2e3268..ca71b6e8fe 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -371,3 +371,266 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -3482841611 6082 -572647.4204209142 6.153814687328988E8 +PREHOOK: query: explain vectorization detail +select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE'test%1' ) + OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE'test%1' ) + OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), CASE WHEN (((cstring1 like 'test%1') or (cstring1 like 'test%2'))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cstring1 like 'test%3') or (cstring1 like '%test%5'))) THEN (cstring1) ELSE ('XXXXX') END (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 16, 20] + selectExpressions: IfExprLongScalarLongScalar(col 15:boolean, val 1, val 0)(children: ColOrCol(col 13:boolean, col 14:boolean)(children: SelectStringColLikeStringScalar(col 6:string) -> 13:boolean, SelectStringColLikeStringScalar(col 6:string) -> 14:boolean) -> 15:boolean) -> 16:int, IfExprStringGroupColumnStringScalar(col 19:boolean, col 6:string, val XXXXX)(children: ColOrCol(col 17:boolean, col 18:boolean)(children: SelectStringColLikeStringScalar(col 6:string) -> 17:boolean, SelectStringColLikeStringScalar(col 6:string) -> 18:boolean) -> 19:boolean) -> 20:string + Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col2), max(_col3) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 16:int) -> bigint, VectorUDAFMaxString(col 20:string) -> string + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] + keys: _col0 (type: tinyint), _col1 (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12288 Data size: 2530992 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No DISTINCT columns IS false + Statistics: Num rows: 12288 Data size: 2530992 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 6] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, string] + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: GROUPBY operator: DISTINCT not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), max(VALUE._col2) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE +'test%1' ) OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE +'test%1' ) OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 3 0 XXXXX +-63 3 0 XXXXX +-62 3 0 XXXXX +-61 3 0 XXXXX +-60 3 0 XXXXX +-59 3 0 XXXXX +-58 3 0 XXXXX +-57 3 0 XXXXX +-56 3 0 XXXXX +-55 3 0 XXXXX +-54 3 0 XXXXX +-53 3 0 XXXXX +-52 3 0 XXXXX +-51 1009 0 XXXXX +-50 3 0 XXXXX +-49 3 0 XXXXX +-48 3 0 XXXXX +-47 3 0 XXXXX +-46 3 0 XXXXX +-45 3 0 XXXXX +-44 3 0 XXXXX +-43 3 0 XXXXX +-42 3 0 XXXXX +-41 3 0 XXXXX +-40 3 0 XXXXX +-39 3 0 XXXXX +-38 3 0 XXXXX +-37 3 0 XXXXX +-36 3 0 XXXXX +-35 3 0 XXXXX +-34 3 0 XXXXX +-33 3 0 XXXXX +-32 3 0 XXXXX +-31 3 0 XXXXX +-30 3 0 XXXXX +-29 3 0 XXXXX +-28 3 0 XXXXX +-27 3 0 XXXXX +-26 3 0 XXXXX +-25 3 0 XXXXX +-24 3 0 XXXXX +-23 3 0 XXXXX +-22 3 0 XXXXX +-21 3 0 XXXXX +-20 3 0 XXXXX +-19 3 0 XXXXX +-18 3 0 XXXXX +-17 3 0 XXXXX +-16 3 0 XXXXX +-15 3 0 XXXXX +-14 3 0 XXXXX +-13 3 0 XXXXX +-12 3 0 XXXXX +-11 3 0 XXXXX +-10 3 0 XXXXX +-9 3 0 XXXXX +-8 3 0 XXXXX +-7 3 0 XXXXX +-6 3 0 XXXXX +-5 3 0 XXXXX +-4 3 0 XXXXX +-3 3 0 XXXXX +-2 3 0 XXXXX +-1 3 0 XXXXX +0 3 0 XXXXX +1 3 0 XXXXX +2 3 0 XXXXX +3 3 0 XXXXX +4 3 0 XXXXX +5 3 0 XXXXX +6 3 0 XXXXX +7 3 0 XXXXX +8 1011 0 XXXXX +9 3 0 XXXXX +10 3 0 XXXXX +11 1011 0 XXXXX +12 3 0 XXXXX +13 3 0 XXXXX +14 3 0 XXXXX +15 3 0 XXXXX +16 3 0 XXXXX +17 3 0 XXXXX +18 3 0 XXXXX +19 3 0 XXXXX +20 3 0 XXXXX +21 3 0 XXXXX +22 3 0 XXXXX +23 3 0 XXXXX +24 3 0 XXXXX +25 3 0 XXXXX +26 3 0 XXXXX +27 3 0 XXXXX +28 3 0 XXXXX +29 3 0 XXXXX +30 3 0 XXXXX +31 3 0 XXXXX +32 3 0 XXXXX +33 3 0 XXXXX +34 3 0 XXXXX +35 3 0 XXXXX +36 3 0 XXXXX +37 3 0 XXXXX +38 3 0 XXXXX +39 3 0 XXXXX +40 3 0 XXXXX +41 3 0 XXXXX +42 3 0 XXXXX +43 3 0 XXXXX +44 3 0 XXXXX +45 3 0 XXXXX +46 3 0 XXXXX +47 3 0 XXXXX +48 3 0 XXXXX +49 3 0 XXXXX +50 3 0 XXXXX +51 3 0 XXXXX +52 3 0 XXXXX +53 3 0 XXXXX +54 3 0 XXXXX +55 3 0 XXXXX +56 3 0 XXXXX +57 3 0 XXXXX +58 3 0 XXXXX +59 3 0 XXXXX +60 3 0 XXXXX +61 3 0 XXXXX +62 3 0 XXXXX +NULL 3066 0 XXXXX diff --git a/ql/src/test/results/clientpositive/multi_insert_distinct.q.out b/ql/src/test/results/clientpositive/multi_insert_distinct.q.out index 4d0fabfbed..eefa1e1197 100644 --- a/ql/src/test/results/clientpositive/multi_insert_distinct.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_distinct.q.out @@ -118,6 +118,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) @@ -293,6 +294,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) @@ -462,6 +464,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 4aba936edf..23518f7ac2 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -93,6 +93,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -333,6 +334,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1885,6 +1887,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -2146,6 +2149,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/nullgroup4.q.out b/ql/src/test/results/clientpositive/nullgroup4.q.out index 1d5642deb3..8797fa0ee2 100644 --- a/ql/src/test/results/clientpositive/nullgroup4.q.out +++ b/ql/src/test/results/clientpositive/nullgroup4.q.out @@ -42,6 +42,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 125 Data size: 13375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) diff --git a/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out b/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out index 318e694d4d..5c204001b4 100644 --- a/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 125 Data size: 37375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 0d5a2ba9ff..1cdecf164a 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -476,12 +476,26 @@ STAGE PLANS: TableScan alias: alltypesparquet Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5:double) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: ctinyint (type: tinyint), cdouble (type: double) minReductionHashAggr: 0.99 mode: hash @@ -492,14 +506,23 @@ STAGE PLANS: null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 104204 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out index dd2383518b..4688f6ede4 100644 --- a/ql/src/test/results/clientpositive/spark/count.q.out +++ b/ql/src/test/results/clientpositive/spark/count.q.out @@ -71,6 +71,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 780 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index b491458d5b..d599f948f5 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -68,6 +68,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby9.q.out b/ql/src/test/results/clientpositive/spark/groupby9.q.out index 1f16390483..6a9c997edf 100644 --- a/ql/src/test/results/clientpositive/spark/groupby9.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -67,6 +67,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -89,6 +90,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -864,6 +866,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -886,6 +889,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -1661,6 +1665,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -1683,6 +1688,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -3261,6 +3267,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -3283,6 +3290,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index 58b1359bb3..50e4345e5a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -67,6 +67,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -89,6 +90,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 8749fd5be8..c5b5771ca0 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -66,6 +66,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -87,6 +88,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -269,6 +271,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -290,6 +293,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 04b6d25fe6..1af3f15242 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -83,6 +83,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -262,6 +263,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -1703,6 +1705,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -1725,6 +1728,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -1864,6 +1868,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 6 Map Operator Tree: TableScan @@ -1910,6 +1915,7 @@ STAGE PLANS: sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/nullgroup4.q.out b/ql/src/test/results/clientpositive/spark/nullgroup4.q.out index 80a55cfb57..15ee622471 100644 --- a/ql/src/test/results/clientpositive/spark/nullgroup4.q.out +++ b/ql/src/test/results/clientpositive/spark/nullgroup4.q.out @@ -47,6 +47,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out index 875bef0b61..aca5200db0 100644 --- a/ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out @@ -45,6 +45,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/union17.q.out b/ql/src/test/results/clientpositive/spark/union17.q.out index c645207eb0..1157a014d0 100644 --- a/ql/src/test/results/clientpositive/spark/union17.q.out +++ b/ql/src/test/results/clientpositive/spark/union17.q.out @@ -92,6 +92,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -114,6 +115,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator @@ -155,6 +157,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n17 Reducer 8 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -179,6 +182,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL Reducer 9 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/vector_count.q.out b/ql/src/test/results/clientpositive/vector_count.q.out index dea87e9b9e..24f5b7d324 100644 --- a/ql/src/test/results/clientpositive/vector_count.q.out +++ b/ql/src/test/results/clientpositive/vector_count.q.out @@ -66,12 +66,26 @@ STAGE PLANS: TableScan alias: abcd Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int, col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) minReductionHashAggr: 0.99 mode: hash @@ -82,14 +96,23 @@ STAGE PLANS: null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 3 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: bigint) + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -171,7 +194,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported + notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]]) vectorized: false Reduce Vectorization: enabled: false diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out index 15b697ab46..c121d9d96c 100644 --- a/ql/src/test/results/clientpositive/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -564,12 +564,27 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5:double) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: ctinyint (type: tinyint), cdouble (type: double) minReductionHashAggr: 0.99 mode: hash @@ -580,13 +595,28 @@ STAGE PLANS: null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 6144 Data size: 104204 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 1a9c3bd7fa..5aecbe8563 100644 --- a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -41,13 +41,28 @@ STAGE PLANS: TableScan alias: dtest Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator expressions: a (type: int) outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(DISTINCT a), count(DISTINCT a) bucketGroup: true + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] keys: a (type: int) minReductionHashAggr: 0.99 mode: hash @@ -57,13 +72,28 @@ STAGE PLANS: key expressions: _col0 (type: int) null sort order: z sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: a:int, b:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -122,12 +152,28 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: cint (type: int), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 13, 16] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(DISTINCT _col0), count(DISTINCT _col0), sum(DISTINCT _col2), sum(DISTINCT _col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 2:int, col 16:double, col 13:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: int), _col2 (type: double), _col1 (type: double) minReductionHashAggr: 0.99 mode: hash @@ -137,13 +183,28 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) null sort order: zzz sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 601608 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -181,3 +242,256 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -3482841611 6082 -572647.4204209142 6.153814687328982E8 +PREHOOK: query: explain vectorization detail +select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE'test%1' ) + OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE'test%1' ) + OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), CASE WHEN (((cstring1 like 'test%1') or (cstring1 like 'test%2'))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cstring1 like 'test%3') or (cstring1 like '%test%5'))) THEN (cstring1) ELSE ('XXXXX') END (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 16, 20] + selectExpressions: IfExprLongScalarLongScalar(col 15:boolean, val 1, val 0)(children: ColOrCol(col 13:boolean, col 14:boolean)(children: SelectStringColLikeStringScalar(col 6:string) -> 13:boolean, SelectStringColLikeStringScalar(col 6:string) -> 14:boolean) -> 15:boolean) -> 16:int, IfExprStringGroupColumnStringScalar(col 19:boolean, col 6:string, val XXXXX)(children: ColOrCol(col 17:boolean, col 18:boolean)(children: SelectStringColLikeStringScalar(col 6:string) -> 17:boolean, SelectStringColLikeStringScalar(col 6:string) -> 18:boolean) -> 19:boolean) -> 20:string + Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col2), max(_col3) + Group By Vectorization: + aggregators: VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 16:int) -> bigint, VectorUDAFMaxString(col 20:string) -> string + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 2:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2] + keys: _col0 (type: tinyint), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6144 Data size: 1265496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false + Statistics: Num rows: 6144 Data size: 1265496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 6] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, string] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), max(VALUE._col2) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE +'test%1' ) OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, count(distinct cint), sum(( CASE WHEN ( ( cstring1 LIKE +'test%1' ) OR ( cstring1 LIKE 'test%2' ) ) THEN 1 ELSE 0 END )) AS s, +max(( CASE WHEN ( ( cstring1 LIKE 'test%3' ) OR ( cstring1 LIKE '%test%5' ) ) +THEN cstring1 ELSE 'XXXXX' END )) AS maxVal from alltypesorc group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 3 0 XXXXX +-63 3 0 XXXXX +-62 3 0 XXXXX +-61 3 0 XXXXX +-60 3 0 XXXXX +-59 3 0 XXXXX +-58 3 0 XXXXX +-57 3 0 XXXXX +-56 3 0 XXXXX +-55 3 0 XXXXX +-54 3 0 XXXXX +-53 3 0 XXXXX +-52 3 0 XXXXX +-51 1009 0 XXXXX +-50 3 0 XXXXX +-49 3 0 XXXXX +-48 3 0 XXXXX +-47 3 0 XXXXX +-46 3 0 XXXXX +-45 3 0 XXXXX +-44 3 0 XXXXX +-43 3 0 XXXXX +-42 3 0 XXXXX +-41 3 0 XXXXX +-40 3 0 XXXXX +-39 3 0 XXXXX +-38 3 0 XXXXX +-37 3 0 XXXXX +-36 3 0 XXXXX +-35 3 0 XXXXX +-34 3 0 XXXXX +-33 3 0 XXXXX +-32 3 0 XXXXX +-31 3 0 XXXXX +-30 3 0 XXXXX +-29 3 0 XXXXX +-28 3 0 XXXXX +-27 3 0 XXXXX +-26 3 0 XXXXX +-25 3 0 XXXXX +-24 3 0 XXXXX +-23 3 0 XXXXX +-22 3 0 XXXXX +-21 3 0 XXXXX +-20 3 0 XXXXX +-19 3 0 XXXXX +-18 3 0 XXXXX +-17 3 0 XXXXX +-16 3 0 XXXXX +-15 3 0 XXXXX +-14 3 0 XXXXX +-13 3 0 XXXXX +-12 3 0 XXXXX +-11 3 0 XXXXX +-10 3 0 XXXXX +-9 3 0 XXXXX +-8 3 0 XXXXX +-7 3 0 XXXXX +-6 3 0 XXXXX +-5 3 0 XXXXX +-4 3 0 XXXXX +-3 3 0 XXXXX +-2 3 0 XXXXX +-1 3 0 XXXXX +0 3 0 XXXXX +1 3 0 XXXXX +2 3 0 XXXXX +3 3 0 XXXXX +4 3 0 XXXXX +5 3 0 XXXXX +6 3 0 XXXXX +7 3 0 XXXXX +8 1011 0 XXXXX +9 3 0 XXXXX +10 3 0 XXXXX +11 1011 0 XXXXX +12 3 0 XXXXX +13 3 0 XXXXX +14 3 0 XXXXX +15 3 0 XXXXX +16 3 0 XXXXX +17 3 0 XXXXX +18 3 0 XXXXX +19 3 0 XXXXX +20 3 0 XXXXX +21 3 0 XXXXX +22 3 0 XXXXX +23 3 0 XXXXX +24 3 0 XXXXX +25 3 0 XXXXX +26 3 0 XXXXX +27 3 0 XXXXX +28 3 0 XXXXX +29 3 0 XXXXX +30 3 0 XXXXX +31 3 0 XXXXX +32 3 0 XXXXX +33 3 0 XXXXX +34 3 0 XXXXX +35 3 0 XXXXX +36 3 0 XXXXX +37 3 0 XXXXX +38 3 0 XXXXX +39 3 0 XXXXX +40 3 0 XXXXX +41 3 0 XXXXX +42 3 0 XXXXX +43 3 0 XXXXX +44 3 0 XXXXX +45 3 0 XXXXX +46 3 0 XXXXX +47 3 0 XXXXX +48 3 0 XXXXX +49 3 0 XXXXX +50 3 0 XXXXX +51 3 0 XXXXX +52 3 0 XXXXX +53 3 0 XXXXX +54 3 0 XXXXX +55 3 0 XXXXX +56 3 0 XXXXX +57 3 0 XXXXX +58 3 0 XXXXX +59 3 0 XXXXX +60 3 0 XXXXX +61 3 0 XXXXX +62 3 0 XXXXX +NULL 3066 0 XXXXX