diff --git data/files/vector_ptf_part_simple.txt data/files/vector_ptf_part_simple.txt new file mode 100644 index 0000000..2bcc7a6 --- /dev/null +++ data/files/vector_ptf_part_simple.txt @@ -0,0 +1,40 @@ +Manufacturer#2 almond aquamarine rose maroon antique 900.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 +Manufacturer#1 almond aquamarine pink moccasin thistle \N +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#5 almond antique blue firebrick mint 1789.69 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique forest lavender goldenrod \N +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 +Manufacturer#4 almond antique violet mint lemon 1375.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#5 almond antique sky peru orange 1788.73 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique chartreuse khaki white 99.68 +Manufacturer#4 almond antique gainsboro frosted violet \N +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique olive coral navajo 1337.29 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 +Manufacturer#3 almond antique misty red olive 1922.98 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 +Manufacturer#4 almond aquamarine floral ivory bisque \N +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 +Manufacturer#3 almond antique metallic orange dim 55.39 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index d684ba8..a53fc1a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -637,6 +637,7 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_leftsemi_mapjoin.q,\ vector_number_compare_projection.q,\ vector_partitioned_date_time.q,\ + vector_ptf_part_simple.q,\ vector_udf1.q,\ vectorization_short_regress.q,\ vectorized_dynamic_partition_pruning.q,\ diff --git ql/src/test/queries/clientpositive/vector_ptf_part_simple.q ql/src/test/queries/clientpositive/vector_ptf_part_simple.q new file mode 100644 index 0000000..4f3a538 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -0,0 +1,268 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text; + +create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc; +INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text; + +select * from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc; + + +-- +-- ROW +-- +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc; + + +create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal; + +create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc; +INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal; + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal; + + + + +create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc; +INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal; + +explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long; + +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long; + + +explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long; + +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long; + + +-- Omit p_name columns + +explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + + +-- Calculated partition key + +explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; + +explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; diff --git ql/src/test/queries/clientpositive/vectorized_ptf.q ql/src/test/queries/clientpositive/vectorized_ptf.q index 232aa11..dbc7ca6 100644 --- ql/src/test/queries/clientpositive/vectorized_ptf.q +++ ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -43,7 +43,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -64,7 +64,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -81,7 +81,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -94,7 +94,7 @@ order by p_name); -- 4. testPTFAlias -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -115,7 +115,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -138,7 +138,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -163,7 +163,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain vectorization extended +explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr @@ -178,7 +178,7 @@ order by p_name -- 8. testJoinRight -explain vectorization extended +explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -193,7 +193,7 @@ order by p_name -- 9. testNoopWithMap -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -208,7 +208,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -227,7 +227,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -248,7 +248,7 @@ order by p_name) -- 12. testFunctionChain -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -269,7 +269,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -296,7 +296,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain vectorization extended +explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -323,7 +323,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain vectorization extended +explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -342,7 +342,7 @@ round(sum(p_retailprice),2) as s from part_orc group by p_mfgr, p_brand; -explain vectorization extended +explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -376,7 +376,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain vectorization extended +explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -413,7 +413,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -448,7 +448,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -483,7 +483,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -515,7 +515,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -550,7 +550,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -583,7 +583,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/windowing_windowspec.q ql/src/test/queries/clientpositive/windowing_windowspec.q index 08b7d5c..c37aed3 100644 --- ql/src/test/queries/clientpositive/windowing_windowspec.q +++ ql/src/test/queries/clientpositive/windowing_windowspec.q @@ -31,6 +31,8 @@ select s, sum(i) over(partition by ts order by s) from over10k limit 100; select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100; +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100; + select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out new file mode 100644 index 0000000..9929550 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -0,0 +1,3032 @@ +PREHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text +POSTHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_mfgr SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_name SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_retailprice SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_retailprice, type:double, comment:null), ] +vector_ptf_part_simple_text.p_mfgr vector_ptf_part_simple_text.p_name vector_ptf_part_simple_text.p_retailprice +PREHOOK: query: select * from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +vector_ptf_part_simple_orc.p_mfgr vector_ptf_part_simple_orc.p_name vector_ptf_part_simple_orc.p_retailprice +Manufacturer#2 almond aquamarine rose maroon antique 900.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#5 almond antique blue firebrick mint 1789.69 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique forest lavender goldenrod NULL +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 +Manufacturer#4 almond antique violet mint lemon 1375.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#5 almond antique sky peru orange 1788.73 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique chartreuse khaki white 99.68 +Manufacturer#4 almond antique gainsboro frosted violet NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique olive coral navajo 1337.29 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 +Manufacturer#3 almond antique misty red olive 1922.98 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 +Manufacturer#4 almond aquamarine floral ivory bisque NULL +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 +Manufacturer#3 almond antique metallic orange dim 55.39 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col0 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_6 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isStar: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique gainsboro frosted violet NULL 4 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 6 1 1 1290.35 1206.26 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 2 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 4 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 5 1 1 1464.48 1788.73 6 6 +Manufacturer#5 almond antique sky peru orange 1788.73 6 1 1 1464.48 1788.73 6 6 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 2 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 4 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 6 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 7 1 1 900.66 1800.7 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 8 1 1 900.66 1800.7 8 8 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 2 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 3 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 5 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 6 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 8 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 9 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 10 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 11 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 1 1 1753.76 1632.66 11 12 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique metallic orange dim 55.39 2 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique olive coral navajo 1337.29 4 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique misty red olive 1922.98 5 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod NULL 7 1 1 590.27 99.68 7 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 8 1 1 590.27 99.68 7 8 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_6 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~CURRENT + isStar: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 12 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 5 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_6 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~CURRENT + isStar: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 12 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 5 2 2 99.68 1190.27 4 5 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond antique violet mint lemon 1375.42 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique blue firebrick mint 1789.69 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique sky peru orange 1788.73 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique metallic orange dim 55.39 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique misty red olive 1922.98 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#3 almond antique chartreuse khaki white 99.68 6386.15 55.39 1922.98 912.3071428571428 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 1173.15 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4100.06 1173.15 1753.76 1366.6866666666667 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5853.820000000001 1173.15 1753.76 1463.4550000000002 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 7607.580000000001 1173.15 1753.76 1521.516 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 14011.01 1173.15 1753.76 1556.778888888889 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 14011.01 1173.15 1753.76 1556.778888888889 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 15643.67 1173.15 1753.76 1564.367 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3491.38 1690.68 1800.7 1745.69 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5292.08 1690.68 1800.7 1764.0266666666666 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 10025.42 900.66 2031.98 1670.9033333333334 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 689.95 99.68 590.27 344.975 +Manufacturer#3 almond antique forest lavender goldenrod NULL 689.95 99.68 590.27 344.975 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1880.22 99.68 1190.27 626.74 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 3401.3500000000004 1611.66 1789.69 1700.6750000000002 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_retailprice SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +vector_ptf_part_simple_text_decimal.p_mfgr vector_ptf_part_simple_text_decimal.p_name vector_ptf_part_simple_text_decimal.p_retailprice +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 10963.930000000000000000 1173.150000000000000000 1753.760000000000000000 1566.275714285714285714 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 12378.350000000000000000 1173.150000000000000000 1753.760000000000000000 1547.293750000000000000 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 9124.760000000000000000 1690.680000000000000000 2031.980000000000000000 1824.952000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 3125.880000000000000000 55.390000000000000000 1190.270000000000000000 625.176000000000000000 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 5048.860000000000000000 55.390000000000000000 1922.980000000000000000 841.476666666666666667 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 4426.600000000000000000 1206.260000000000000000 1844.920000000000000000 1475.533333333333333333 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 6801.740000000000000000 1611.660000000000000000 1789.690000000000000000 1700.435000000000000000 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 7819.840000000000000000 1018.100000000000000000 1789.690000000000000000 1563.968000000000000000 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_bigint EXPRESSION [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +p_mfgr p_name _c2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_bigint (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#4 almond antique violet mint lemon 137542 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond antique gainsboro frosted violet NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 571695 120626 184492 142923.75 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique blue firebrick mint 178969 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique sky peru orange 178873 928432 101810 178969 154738.66666666666 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet chocolate turquoise 169068 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine burnished black steel 141442 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#3 almond antique forest lavender goldenrod 59027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique metallic orange dim 5539 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique misty red olive 192298 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod NULL 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique chartreuse khaki white 9968 638615 5539 192298 91230.71428571429 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_bigint (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1096393 117315 175376 156627.57142857142 +Manufacturer#1 almond aquamarine burnished black steel 141442 1237835 117315 175376 154729.375 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond antique violet chocolate turquoise 169068 169068 169068 169068 169068.0 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 912476 169068 203198 182495.2 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique chartreuse khaki white 9968 9968 9968 9968 9968.0 +Manufacturer#3 almond antique forest lavender goldenrod 59027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod NULL 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique metallic orange dim 5539 312588 5539 119027 62517.6 +Manufacturer#3 almond antique misty red olive 192298 504886 5539 192298 84147.66666666667 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 137542 137542 137542 137542 137542.0 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 442660 120626 184492 147553.33333333334 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#5 almond antique blue firebrick mint 178969 178969 178969 178969 178969.0 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique sky peru orange 178873 680174 161166 178969 170043.5 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 781984 101810 178969 156396.8 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#4 1290.35 1 +Manufacturer#4 1375.42 1 +Manufacturer#4 NULL 1 +Manufacturer#4 NULL 1 +Manufacturer#4 1844.92 1 +Manufacturer#4 1206.26 1 +Manufacturer#5 1464.48 1 +Manufacturer#5 1018.1 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1788.73 1 +Manufacturer#2 900.66 1 +Manufacturer#2 1698.66 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1000.6 1 +Manufacturer#2 2031.98 1 +Manufacturer#2 1800.7 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1414.42 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1602.59 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 NULL 1 +Manufacturer#1 1632.66 1 +Manufacturer#3 590.27 1 +Manufacturer#3 55.39 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 1337.29 1 +Manufacturer#3 1922.98 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 NULL 1 +Manufacturer#3 99.68 1 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1602.59 7 +Manufacturer#1 1414.42 8 +Manufacturer#1 1632.66 9 +Manufacturer#1 NULL 9 +Manufacturer#1 1632.66 9 +Manufacturer#1 1632.66 9 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 2031.98 5 +Manufacturer#2 900.66 6 +Manufacturer#2 1698.66 6 +Manufacturer#2 1000.6 8 +Manufacturer#3 99.68 1 +Manufacturer#3 590.27 2 +Manufacturer#3 NULL 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 55.39 6 +Manufacturer#3 1922.98 7 +Manufacturer#3 1337.29 8 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 2 +Manufacturer#4 NULL 3 +Manufacturer#4 1206.26 3 +Manufacturer#4 1844.92 5 +Manufacturer#4 1290.35 6 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 2 +Manufacturer#5 1611.66 2 +Manufacturer#5 1788.73 4 +Manufacturer#5 1018.1 5 +Manufacturer#5 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyExpressions: VectorUDFAdaptor(CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END)(children: StringGroupColEqualStringScalar(col 0, val Manufacturer#2) -> 3:boolean) -> 4:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, timestamp + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond aquamarine burnished black steel 1414.42 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 1 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 1 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 1 +Manufacturer#3 almond antique misty red olive 1922.98 1 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique metallic orange dim 55.39 1 +Manufacturer#3 almond antique olive coral navajo 1337.29 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 +Manufacturer#3 almond antique forest lavender goldenrod NULL 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 1 +Manufacturer#5 almond antique sky peru orange 1788.73 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: VectorUDFAdaptor(CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END)(children: StringGroupColEqualStringScalar(col 0, val Manufacturer#2) -> 3:boolean) -> 4:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, timestamp, timestamp + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Operator (PTF) not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 +Manufacturer#3 almond antique forest lavender goldenrod NULL 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique metallic orange dim 55.39 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique sky peru orange 1788.73 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 6bd8a29..df4b0d8 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ] POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +114,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -146,16 +146,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -166,60 +169,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -246,16 +202,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -306,26 +258,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -381,14 +318,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -418,20 +355,26 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -442,75 +385,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -521,60 +424,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p2] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -583,20 +439,15 @@ STAGE PLANS: 0 p_partkey (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5 - Position of Big Table: 0 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -623,16 +474,12 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -670,26 +517,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -741,13 +573,13 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -774,16 +606,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -794,60 +629,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -878,26 +666,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -945,7 +718,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -955,7 +728,7 @@ from noop(on part_orc order by p_name ) abc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -987,16 +760,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1007,60 +783,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -1087,16 +816,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -1147,26 +872,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1222,7 +932,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1232,7 +942,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1264,16 +974,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1284,60 +997,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -1364,16 +1030,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -1425,26 +1087,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1500,7 +1147,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1511,7 +1158,7 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1544,16 +1191,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1564,60 +1214,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -1653,15 +1256,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -1714,26 +1313,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1790,14 +1374,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr @@ -1826,16 +1410,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1846,75 +1433,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1925,60 +1472,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -2004,21 +1504,16 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -2027,30 +1522,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Position of Big Table: 0 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2102,14 +1581,14 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -2138,19 +1617,25 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2161,72 +1646,29 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2237,60 +1679,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -2299,7 +1694,6 @@ STAGE PLANS: 0 p_partkey (type: int) 1 _col0 (type: int) outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Position of Big Table: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) @@ -2307,29 +1701,13 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -2355,18 +1733,14 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true Stage: Stage-0 Fetch Operator @@ -2418,14 +1792,14 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2454,7 +1828,6 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false PTF Operator Function definitions: Input definition @@ -2473,12 +1846,9 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - null sort order: aaz sort order: ++- Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -2487,60 +1857,8 @@ STAGE PLANS: inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: PTF Operator (PTF) not supported vectorized: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -2568,15 +1886,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - null sort order: aaz sort order: ++- Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -2614,26 +1928,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2683,7 +1982,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2692,7 +1991,7 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2723,7 +2022,6 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false PTF Operator Function definitions: Input definition @@ -2742,13 +2040,10 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -2757,60 +2052,8 @@ STAGE PLANS: inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: PTF Operator (PTF) not supported vectorized: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -2838,16 +2081,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -2898,26 +2137,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2971,7 +2195,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2980,7 +2204,7 @@ from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3011,16 +2235,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3031,60 +2258,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3111,16 +2291,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3171,26 +2347,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3244,7 +2405,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3254,7 +2415,7 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3287,16 +2448,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3307,60 +2471,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3410,16 +2527,12 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3454,16 +2567,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3514,26 +2623,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3589,7 +2683,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3602,7 +2696,7 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3637,16 +2731,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3657,60 +2754,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3737,16 +2787,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -3789,26 +2835,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:bigint:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3870,7 +2901,7 @@ Manufacturer#5 almond antique medium spring khaki 2 6208.18 Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.97 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.31 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3882,7 +2913,7 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3917,16 +2948,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3937,75 +2971,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4016,60 +3010,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4095,21 +3042,16 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -4118,20 +3060,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 - Position of Big Table: 0 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4195,26 +3132,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types string:string:int:int:bigint:double:double:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4276,13 +3198,13 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -4310,16 +3232,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4330,60 +3255,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4419,50 +3297,46 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Reducer 3 Execution mode: vectorized, llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4530,7 +3404,7 @@ POSTHOOK: Output: default@mfgr_price_view POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4538,7 +3412,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4568,26 +3442,40 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Select Operator expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_brand, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 7] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 3 + native: false + projectedOutputColumns: [0] keys: p_mfgr (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col2 (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4598,60 +3486,13 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4684,16 +3525,12 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col2 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4730,26 +3567,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:double:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4837,7 +3659,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -4853,7 +3675,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -4897,16 +3719,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4917,60 +3742,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4997,25 +3775,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reduce Output Operator key expressions: _col2 (type: string), _col5 (type: int) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5066,39 +3837,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5135,16 +3881,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: sum_window_0 (type: bigint), _col5 (type: int) - auto parallelism: true Reducer 5 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5202,36 +3944,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -5240,65 +3958,27 @@ STAGE PLANS: Move Operator tables: replace: true -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 Stage: Stage-4 Stats-Aggr Operator -#### A masked pattern was here #### Stage: Stage-1 Move Operator tables: replace: true -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 Stage: Stage-5 Stats-Aggr Operator -#### A masked pattern was here #### PREHOOK: query: from noop(on part_orc partition by p_mfgr @@ -5418,7 +4098,7 @@ Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31 Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5435,7 +4115,7 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5475,16 +4155,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string) - null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_name (type: string), p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5495,60 +4178,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5605,16 +4241,12 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5649,16 +4281,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5709,26 +4337,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -5798,7 +4411,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5815,7 +4428,7 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5856,16 +4469,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string) - null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_name (type: string), p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5876,60 +4492,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5963,16 +4532,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -5999,16 +4564,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6035,16 +4596,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 5 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6095,26 +4652,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6184,7 +4726,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6199,7 +4741,7 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6237,16 +4779,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6257,60 +4802,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6344,16 +4842,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6387,16 +4881,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6447,26 +4937,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6532,7 +5007,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6549,7 +5024,7 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6590,16 +5065,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6610,60 +5088,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6697,16 +5128,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6749,16 +5176,12 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6786,16 +5209,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 5 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -6846,26 +5265,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6935,7 +5339,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6951,7 +5355,7 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6990,16 +5394,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7010,60 +5417,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -7113,16 +5473,12 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -7157,16 +5513,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -7217,26 +5569,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -7304,7 +5641,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -7318,7 +5655,7 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -7355,16 +5692,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7375,60 +5715,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -7478,16 +5771,12 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -7515,16 +5804,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -7575,26 +5860,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 8b313c0..3e6a73f 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ] POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +114,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -145,16 +145,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -164,59 +167,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -243,15 +199,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -302,26 +254,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -377,14 +314,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -413,20 +350,26 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -436,75 +379,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -514,59 +417,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p2] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: true Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -583,15 +439,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -618,15 +470,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -664,26 +512,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -735,13 +568,13 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -767,16 +600,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -786,59 +622,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -869,26 +658,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -936,7 +710,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -946,7 +720,7 @@ from noop(on part_orc order by p_name ) abc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -977,16 +751,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -996,59 +773,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1075,15 +805,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1134,26 +860,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1209,7 +920,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1219,7 +930,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1250,16 +961,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -1269,59 +983,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1348,15 +1015,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1408,26 +1071,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1483,7 +1131,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1494,7 +1142,7 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1526,16 +1174,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -1545,59 +1196,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1633,14 +1237,10 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1693,26 +1293,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1769,14 +1354,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr @@ -1804,16 +1389,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -1823,75 +1411,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -1901,59 +1449,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -1979,20 +1480,15 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false Reducer 3 - Needs Tagging: true Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2009,26 +1505,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2080,14 +1561,14 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -2115,19 +1596,25 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -2137,72 +1624,29 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -2212,59 +1656,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: true Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2285,28 +1682,12 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2332,18 +1713,14 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -2395,14 +1772,14 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2430,7 +1807,6 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false PTF Operator Function definitions: Input definition @@ -2449,71 +1825,16 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - null sort order: aaz sort order: ++- Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: PTF Operator (PTF) not supported vectorized: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2541,14 +1862,10 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - null sort order: aaz sort order: ++- Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2586,26 +1903,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2655,7 +1957,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2664,7 +1966,7 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2694,7 +1996,6 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false PTF Operator Function definitions: Input definition @@ -2713,72 +2014,17 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: PTF Operator (PTF) not supported vectorized: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2806,15 +2052,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -2865,26 +2107,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2938,7 +2165,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2947,7 +2174,7 @@ from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2977,16 +2204,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -2996,59 +2226,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3075,15 +2258,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3134,26 +2313,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3207,7 +2371,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3217,7 +2381,7 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3249,16 +2413,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -3268,59 +2435,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3370,15 +2490,11 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3413,15 +2529,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3472,26 +2584,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3547,7 +2644,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3560,7 +2657,7 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3594,16 +2691,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -3613,59 +2713,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3692,15 +2745,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -3743,26 +2792,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:bigint:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3824,7 +2858,7 @@ Manufacturer#5 almond antique medium spring khaki 2 6208.18 Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.97 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.31 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3836,7 +2870,7 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3870,16 +2904,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -3889,75 +2926,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -3967,59 +2964,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4045,20 +2995,15 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: true Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4075,15 +3020,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4147,26 +3088,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types string:string:int:int:bigint:double:double:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4228,13 +3154,13 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -4261,16 +3187,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -4280,59 +3209,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4368,50 +3250,46 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false Reducer 3 Execution mode: vectorized - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4479,7 +3357,7 @@ POSTHOOK: Output: default@mfgr_price_view POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4487,7 +3365,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4516,26 +3394,40 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Select Operator expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_brand, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 7] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 3 + native: false + projectedOutputColumns: [0] keys: p_mfgr (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col2 (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -4545,59 +3437,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4630,15 +3475,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col2 (type: double) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4675,26 +3516,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:double:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4782,7 +3608,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -4798,7 +3624,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -4841,16 +3667,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -4860,59 +3689,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -4963,38 +3745,13 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5031,15 +3788,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: sum_window_0 (type: bigint), _col5 (type: int) - auto parallelism: false Reducer 5 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5097,38 +3850,13 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Reducer 6 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5153,15 +3881,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false Reducer 7 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5186,77 +3910,36 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col5 (type: int) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string) - auto parallelism: false Stage: Stage-0 Move Operator tables: replace: true -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 Stage: Stage-3 Stats-Aggr Operator -#### A masked pattern was here #### Stage: Stage-1 Move Operator tables: replace: true -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 Stage: Stage-4 Stats-Aggr Operator -#### A masked pattern was here #### PREHOOK: query: from noop(on part_orc partition by p_mfgr @@ -5376,7 +4059,7 @@ Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31 Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5393,7 +4076,7 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5432,16 +4115,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string) - null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_name (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -5451,59 +4137,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5560,15 +4199,11 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5603,15 +4238,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5662,26 +4293,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -5751,7 +4367,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5768,7 +4384,7 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5808,16 +4424,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string) - null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_name (type: string), p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -5827,59 +4446,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5913,15 +4485,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5948,15 +4516,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -5983,15 +4547,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 5 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6042,26 +4602,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6131,7 +4676,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6146,7 +4691,7 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6183,16 +4728,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -6202,59 +4750,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6288,15 +4789,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6330,15 +4827,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6389,26 +4882,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6474,7 +4952,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6491,7 +4969,7 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6531,16 +5009,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -6550,59 +5031,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6636,15 +5070,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6687,15 +5117,11 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6723,15 +5149,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 5 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -6782,26 +5204,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6871,7 +5278,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6887,7 +5294,7 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6925,16 +5332,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -6944,59 +5354,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -7046,15 +5409,11 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -7089,15 +5448,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -7148,26 +5503,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -7235,7 +5575,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -7249,7 +5589,7 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -7285,16 +5625,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: false Execution mode: vectorized Map Vectorization: enabled: true @@ -7304,59 +5647,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -7406,15 +5702,11 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 3 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -7442,15 +5734,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: false Reducer 4 - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true @@ -7501,26 +5789,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/windowing_windowspec.q.out ql/src/test/results/clientpositive/windowing_windowspec.q.out index 52fe871..9da6183 100644 --- ql/src/test/results/clientpositive/windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/windowing_windowspec.q.out @@ -800,6 +800,114 @@ POSTHOOK: Input: default@over10k 71.68 722.6499947607517 79.46 802.1099938452244 80.02 882.1299904882908 +PREHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +3.17 NULL +10.89 3.1700000762939453 +14.54 14.0600004196167 +14.78 25.43000030517578 +17.85 29.31999969482422 +20.61 32.63000011444092 +28.69 38.46000099182129 +29.22 49.30000114440918 +31.17 57.90999984741211 +38.35 60.38999938964844 +38.61 69.51999855041504 +39.48 76.95999908447266 +40.54 78.09000015258789 +41.6 80.02000045776367 +46.08 82.13999938964844 +54.36 87.68000030517578 +56.94 100.44000244140625 +64.96 111.29999923706055 +73.52 121.89999771118164 +78.58 138.47999572753906 +81.41 152.0999984741211 +84.71 159.99000549316406 +87.43 166.12000274658203 +91.36 172.13999938964844 +92.96 178.79000091552734 +95.04 184.31999969482422 +0.83 NULL +1.99 0.8299999833106995 +3.73 2.8199999928474426 +8.86 5.7200000286102295 +10.62 12.589999675750732 +11.32 19.479999542236328 +12.83 21.9399995803833 +14.7 24.149999618530273 +14.96 27.52999973297119 +17.58 29.65999984741211 +19.1 32.53999996185303 +21.01 36.68000030517578 +26.95 40.11000061035156 +27.23 47.96000099182129 +29.07 54.18000030517578 +29.71 56.29999923706055 +31.84 58.779998779296875 +31.94 61.54999923706055 +35.32 63.78000068664551 +37.32 67.26000022888184 +38.5 72.63999938964844 +42.08 75.81999969482422 +44.3 80.58000183105469 +44.66 86.38000106811523 +46.84 88.95999908447266 +48.89 91.5 +49.64 95.72999954223633 +50.28 98.52999877929688 +52.09 99.91999816894531 +53.26 102.36999893188477 +54.09 105.3499984741211 +56.45 107.3499984741211 +56.76 110.54000091552734 +61.41 113.20999908447266 +61.88 118.16999816894531 +63.03 123.29000091552734 +64.55 124.90999984741211 +68.62 127.58000183105469 +76.13 133.17000579833984 +79.05 144.75 +80.43 155.18000030517578 +81.41 159.4800033569336 +82.85 161.84000396728516 +83.98 164.26000213623047 +84.21 166.8300018310547 +85.55 168.19000244140625 +87.93 169.76000213623047 +88.93 173.4800033569336 +94.27 176.86000061035156 +99.45 183.1999969482422 +0.36 NULL +0.48 0.36000001430511475 +0.79 0.8400000035762787 +1.27 1.270000010728836 +4.48 2.060000002384186 +9.0 5.75 +23.27 13.480000019073486 +25.13 32.27000045776367 +25.34 48.39999961853027 +25.91 50.46999931335449 +29.01 51.25 +30.47 54.920000076293945 +37.95 59.47999954223633 +39.3 68.42000007629395 +45.91 77.25 +52.44 85.20999908447266 +54.1 98.3499984741211 +56.7 106.53999710083008 +58.77 110.79999923706055 +62.09 115.47000122070312 +68.2 120.86000061035156 +71.68 130.28999710083008 +79.46 139.87999725341797 +80.02 151.13999938964844 PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 PREHOOK: type: QUERY PREHOOK: Input: default@over10k