diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index fb50588fe7..5aadf2c8dd 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -159,6 +159,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ join_emit_interval.q,\ join46.q,\ join_nullsafe.q,\ + keep_uniform.q,\ kill_query.q,\ leftsemijoin.q,\ limit_pushdown.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java index 1e3887b157..0cb3b21fd8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -82,6 +83,11 @@ import com.google.common.collect.Sets; import com.google.common.collect.TreeMultiset; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.AUTOPARALLEL; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNSET; + /** * Shared computation optimizer. * @@ -409,6 +415,8 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { continue; } + deduplicateReduceTraits(retainableRsOp.getConf(), discardableRsOp.getConf()); + // We can merge Operator lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1); Operator lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1); @@ -1124,7 +1132,7 @@ private static boolean compareOperator(ParseContext pctx, Operator op1, Opera op1Conf.getTag() == op2Conf.getTag() && StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder()) && op1Conf.getTopN() == op2Conf.getTopN() && - op1Conf.isAutoParallel() == op2Conf.isAutoParallel()) { + canDeduplicateReduceTraits(op1Conf, op2Conf)) { return true; } else { return false; @@ -1474,6 +1482,113 @@ private static void pushFilterToTopOfTableScan( } } + static boolean canDeduplicateReduceTraits(ReduceSinkDesc retainable, ReduceSinkDesc discardable) { + return deduplicateReduceTraits(retainable, discardable, false); + } + + static boolean deduplicateReduceTraits(ReduceSinkDesc retainable, ReduceSinkDesc discardable) { + return deduplicateReduceTraits(retainable, discardable, true); + } + + private static boolean deduplicateReduceTraits(ReduceSinkDesc retainable, + ReduceSinkDesc discardable, boolean apply) { + + final EnumSet retainableTraits = retainable.getReducerTraits(); + final EnumSet discardableTraits = discardable.getReducerTraits(); + + final boolean x1 = retainableTraits.contains(UNSET); + final boolean f1 = retainableTraits.contains(FIXED); + final boolean u1 = retainableTraits.contains(UNIFORM); + final boolean a1 = retainableTraits.contains(AUTOPARALLEL); + final int n1 = retainable.getNumReducers(); + + final boolean x2 = discardableTraits.contains(UNSET); + final boolean f2 = discardableTraits.contains(FIXED); + final boolean u2 = discardableTraits.contains(UNIFORM); + final boolean a2 = discardableTraits.contains(AUTOPARALLEL); + final int n2 = discardable.getNumReducers(); + + boolean dedup = false; + boolean x3 = false; + boolean f3 = false; + boolean u3 = false; + boolean a3 = false; + int n3 = n1; + + // NOTE: UNSET is exclusive from other traits, so FIXED is. + + if (x1 || x2) { + // UNSET + X = X + dedup = true; + n3 = Math.max(n1, n2); + x3 = x1 && x2; + f3 = f1 || f2; + u3 = u1 || u2; + a3 = a1 || a2; + } else if (f1 || f2) { + if (f1 && f2) { + // FIXED(x) + FIXED(x) = FIXED(x) + // FIXED(x) + FIXED(y) = no deduplication (where x != y) + if (n1 == n2) { + dedup = true; + f3 = true; + } + } else { + // FIXED(x) + others = FIXED(x) + dedup = true; + f3 = true; + if (f1) { + n3 = n1; + } else { + n3 = n2; + } + } + } else { + if (u1 && u2) { + // UNIFORM(x) + UNIFORM(y) = UNIFORM(max(x, y)) + dedup = true; + u3 = true; + n3 = Math.max(n1, n2); + } + if (a1 && a2) { + // AUTOPARALLEL(x) + AUTOPARALLEL(y) = AUTOPARALLEL(max(x, y)) + dedup = true; + a3 = true; + n3 = Math.max(n1, n2); + } + } + + // Gether the results into the retainable object + if (apply && dedup) { + retainable.setNumReducers(n3); + + if (x3) { + retainableTraits.add(UNSET); + } else { + retainableTraits.remove(UNSET); + } + + if (f3) { + retainableTraits.add(FIXED); + } else { + retainableTraits.remove(FIXED); + } + + if (u3) { + retainableTraits.add(UNIFORM); + } else { + retainableTraits.remove(UNIFORM); + } + + if (a3) { + retainableTraits.add(AUTOPARALLEL); + } else { + retainableTraits.remove(AUTOPARALLEL); + } + } + return dedup; + } + private static class SharedResult { final List> retainableOps; final List> discardableOps; diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSharedWorkOptimizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSharedWorkOptimizer.java new file mode 100644 index 0000000000..87cef8fcd7 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSharedWorkOptimizer.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.junit.Test; + +import java.util.EnumSet; + +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.AUTOPARALLEL; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNSET; +import static org.junit.Assert.*; + +public class TestSharedWorkOptimizer { + + private static final EnumSet unset = EnumSet.of(UNSET); + private static final EnumSet fixed = EnumSet.of(FIXED); + private static final EnumSet uniform = EnumSet.of(UNIFORM); + private static final EnumSet autoparallel = EnumSet.of(AUTOPARALLEL); + private static final EnumSet uniformAutoparallel = EnumSet.of(UNIFORM, AUTOPARALLEL); + + private void ensureDeduplicate( + EnumSet traits1, int numReducers1, + EnumSet traits2, int numReducers2, + EnumSet expectedTraits, int expectedNumReducers) { + + ReduceSinkDesc rsConf1; + ReduceSinkDesc rsConf2; + boolean deduplicated; + + rsConf1 = new ReduceSinkDesc(); + rsConf1.setReducerTraits(traits1); + rsConf1.setNumReducers(numReducers1); + rsConf2 = new ReduceSinkDesc(); + rsConf2.setReducerTraits(traits2); + rsConf2.setNumReducers(numReducers2); + deduplicated = SharedWorkOptimizer.deduplicateReduceTraits(rsConf1, rsConf2); + assertTrue(deduplicated); + assertEquals(expectedTraits, rsConf1.getReducerTraits()); + assertEquals(expectedNumReducers, rsConf1.getNumReducers()); + + rsConf1 = new ReduceSinkDesc(); + rsConf1.setReducerTraits(traits1); + rsConf1.setNumReducers(numReducers1); + rsConf2 = new ReduceSinkDesc(); + rsConf2.setReducerTraits(traits2); + rsConf2.setNumReducers(numReducers2); + deduplicated = SharedWorkOptimizer.deduplicateReduceTraits(rsConf2, rsConf1); + assertTrue(deduplicated); + assertEquals(expectedTraits, rsConf2.getReducerTraits()); + assertEquals(expectedNumReducers, rsConf2.getNumReducers()); + } + + private void ensureNotDeduplicate( + EnumSet traits1, int numReducers1, + EnumSet traits2, int numReducers2) { + + ReduceSinkDesc rsConf1; + ReduceSinkDesc rsConf2; + boolean deduplicated; + + rsConf1 = new ReduceSinkDesc(); + rsConf1.setReducerTraits(traits1); + rsConf1.setNumReducers(numReducers1); + rsConf2 = new ReduceSinkDesc(); + rsConf2.setReducerTraits(traits2); + rsConf2.setNumReducers(numReducers2); + deduplicated = SharedWorkOptimizer.deduplicateReduceTraits(rsConf1, rsConf2); + assertFalse(deduplicated); + + rsConf1 = new ReduceSinkDesc(); + rsConf1.setReducerTraits(traits1); + rsConf1.setNumReducers(numReducers1); + rsConf2 = new ReduceSinkDesc(); + rsConf2.setReducerTraits(traits2); + rsConf2.setNumReducers(numReducers2); + deduplicated = SharedWorkOptimizer.deduplicateReduceTraits(rsConf1, rsConf2); + assertFalse(deduplicated); + } + + @Test + public void testDeduplicate() { + // UNSET + ensureDeduplicate(unset, 0, unset, 0, unset, 0); + ensureDeduplicate(unset, 0, fixed, 1, fixed, 1); + ensureDeduplicate(unset, 0, uniform, 1, uniform, 1); + ensureDeduplicate(unset, 0, autoparallel, 1, autoparallel, 1); + ensureDeduplicate(unset, 0, uniformAutoparallel, 1, uniformAutoparallel, 1); + + // FIXED + ensureDeduplicate(fixed, 1, fixed, 1, fixed, 1); + ensureNotDeduplicate(fixed, 1, fixed, 2); + ensureDeduplicate(fixed, 1, uniform, 1, fixed, 1); + ensureDeduplicate(fixed, 1, autoparallel, 2, fixed, 1); + ensureDeduplicate(fixed, 1, uniformAutoparallel, 2, fixed, 1); + + // UNIFORM + ensureDeduplicate(uniform, 1, uniform, 2, uniform, 2); + ensureNotDeduplicate(uniform, 1, autoparallel, 2); + ensureDeduplicate(uniform, 1, uniformAutoparallel, 2, uniform, 2); + + // AUTOPARALLEL + ensureDeduplicate(autoparallel, 1, uniformAutoparallel, 2, autoparallel, 2); + + // UNIFORM and AUTOPARALLEL + ensureDeduplicate(uniformAutoparallel, 1, uniformAutoparallel, 2, uniformAutoparallel, 2); + } +} \ No newline at end of file diff --git ql/src/test/queries/clientpositive/keep_uniform.q ql/src/test/queries/clientpositive/keep_uniform.q new file mode 100644 index 0000000000..d4d5a66a62 --- /dev/null +++ ql/src/test/queries/clientpositive/keep_uniform.q @@ -0,0 +1,201 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.mapred.mode=nonstrict; + + +drop table if exists customer_address; +create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists date_dim; +create table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_returns; +create table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_sales; +create table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_site; +create table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +explain +vectorization detail +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100; diff --git ql/src/test/results/clientpositive/keep_uniform.q.out ql/src/test/results/clientpositive/keep_uniform.q.out new file mode 100644 index 0000000000..f182acc067 --- /dev/null +++ ql/src/test/results/clientpositive/keep_uniform.q.out @@ -0,0 +1,1118 @@ +PREHOOK: query: drop table if exists customer_address +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists customer_address +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_address +POSTHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_address +PREHOOK: query: drop table if exists date_dim +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists date_dim +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table if exists web_returns +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists web_returns +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_returns +POSTHOOK: query: create table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_returns +PREHOOK: query: drop table if exists web_sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists web_sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_sales +POSTHOOK: query: create table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_sales +PREHOOK: query: drop table if exists web_site +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists web_site +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_site +POSTHOOK: query: create table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_site +PREHOOK: query: explain +vectorization detail +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +#### A masked pattern was here #### +POSTHOOK: query: explain +vectorization detail +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-10 is a root stage + Stage-11 depends on stages: Stage-10 + Stage-15 is a root stage + Stage-13 depends on stages: Stage-15 + Stage-14 depends on stages: Stage-13 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + filterExpr: (ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: customer_address + filterExpr: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: web_site + filterExpr: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: web_site_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col5) + keys: _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:int, 1:_col2:decimal(17,2), 2:_col3:decimal(17,2)] + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: _col0:int, _col2:decimal(17,2), _col3:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(_col0), sum(_col1), sum(_col2) + mode: partial2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:bigint, 1:_col1:decimal(17,2), 2:_col2:decimal(17,2)] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: _col0:bigint, _col1:decimal(17,2), _col2:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col1:decimal(17,2), 1:_col2:decimal(17,2), 2:_col3:bigint] + Reduce Output Operator + key expressions: _col3 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: _col1:decimal(17,2), _col2:decimal(17,2), _col3:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + filterExpr: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: ws2 + filterExpr: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:int] + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + filterExpr: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: ws2 + filterExpr: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: web_returns + filterExpr: wr_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: wr_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_order_number (type: int) + outputColumnNames: _col13 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int) + sort order: + + Map-reduce partition columns: _col13 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col13 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col13 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:int] + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/keep_uniform.q.out ql/src/test/results/clientpositive/llap/keep_uniform.q.out new file mode 100644 index 0000000000..27a48f4807 --- /dev/null +++ ql/src/test/results/clientpositive/llap/keep_uniform.q.out @@ -0,0 +1,1191 @@ +PREHOOK: query: drop table if exists customer_address +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists customer_address +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_address +POSTHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_address +PREHOOK: query: drop table if exists date_dim +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists date_dim +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table if exists web_returns +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists web_returns +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_returns +POSTHOOK: query: create table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_returns +PREHOOK: query: drop table if exists web_sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists web_sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_sales +POSTHOOK: query: create table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_sales +PREHOOK: query: drop table if exists web_site +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists web_site +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_site +POSTHOOK: query: create table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_site +PREHOOK: query: explain +vectorization detail +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +#### A masked pattern was here #### +POSTHOOK: query: explain +vectorization detail +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 14 <- Map 17 (SIMPLE_EDGE), Reducer 12 (ONE_TO_ONE_EDGE) + Reducer 15 <- Reducer 14 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 13 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws1 + filterExpr: (ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ws_sold_date_sk:int, 1:ws_sold_time_sk:int, 2:ws_ship_date_sk:int, 3:ws_item_sk:int, 4:ws_bill_customer_sk:int, 5:ws_bill_cdemo_sk:int, 6:ws_bill_hdemo_sk:int, 7:ws_bill_addr_sk:int, 8:ws_ship_customer_sk:int, 9:ws_ship_cdemo_sk:int, 10:ws_ship_hdemo_sk:int, 11:ws_ship_addr_sk:int, 12:ws_web_page_sk:int, 13:ws_web_site_sk:int, 14:ws_ship_mode_sk:int, 15:ws_warehouse_sk:int, 16:ws_promo_sk:int, 17:ws_order_number:int, 18:ws_quantity:int, 19:ws_wholesale_cost:decimal(7,2)/DECIMAL_64, 20:ws_list_price:decimal(7,2)/DECIMAL_64, 21:ws_sales_price:decimal(7,2)/DECIMAL_64, 22:ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, 23:ws_ext_sales_price:decimal(7,2)/DECIMAL_64, 24:ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, 25:ws_ext_list_price:decimal(7,2)/DECIMAL_64, 26:ws_ext_tax:decimal(7,2)/DECIMAL_64, 27:ws_coupon_amt:decimal(7,2)/DECIMAL_64, 28:ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, 29:ws_net_paid:decimal(7,2)/DECIMAL_64, 30:ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, 31:ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, 32:ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, 33:ws_net_profit:decimal(7,2)/DECIMAL_64, 34:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 17:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 11:int), SelectColumnIsNotNull(col 13:int)) + predicate: (ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 11, 13, 17, 28, 33] + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 11:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int, 13:int, 17:int, 28:decimal(7,2), 33:decimal(7,2) + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 34 + includeColumns: [2, 11, 13, 17, 28, 33] + dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 10 + Map Operator Tree: + TableScan + alias: web_site + filterExpr: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:web_site_sk:int, 1:web_site_id:string, 2:web_rec_start_date:string, 3:web_rec_end_date:string, 4:web_name:string, 5:web_open_date_sk:int, 6:web_close_date_sk:int, 7:web_class:string, 8:web_manager:string, 9:web_mkt_id:int, 10:web_mkt_class:string, 11:web_mkt_desc:string, 12:web_market_manager:string, 13:web_company_id:int, 14:web_company_name:string, 15:web_street_number:string, 16:web_street_name:string, 17:web_street_type:string, 18:web_suite_number:string, 19:web_city:string, 20:web_county:string, 21:web_state:string, 22:web_zip:string, 23:web_country:string, 24:web_gmt_offset:decimal(5,2)/DECIMAL_64, 25:web_tax_percentage:decimal(5,2)/DECIMAL_64, 26:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14:string, val pri), SelectColumnIsNotNull(col 0:int)) + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: web_site_sk (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 26 + includeColumns: [0, 14] + dataColumns: web_site_sk:int, web_site_id:string, web_rec_start_date:string, web_rec_end_date:string, web_name:string, web_open_date_sk:int, web_close_date_sk:int, web_class:string, web_manager:string, web_mkt_id:int, web_mkt_class:string, web_mkt_desc:string, web_market_manager:string, web_company_id:int, web_company_name:string, web_street_number:string, web_street_name:string, web_street_type:string, web_suite_number:string, web_city:string, web_county:string, web_state:string, web_zip:string, web_country:string, web_gmt_offset:decimal(5,2)/DECIMAL_64, web_tax_percentage:decimal(5,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 11 + Map Operator Tree: + TableScan + alias: ws1 + filterExpr: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ws_sold_date_sk:int, 1:ws_sold_time_sk:int, 2:ws_ship_date_sk:int, 3:ws_item_sk:int, 4:ws_bill_customer_sk:int, 5:ws_bill_cdemo_sk:int, 6:ws_bill_hdemo_sk:int, 7:ws_bill_addr_sk:int, 8:ws_ship_customer_sk:int, 9:ws_ship_cdemo_sk:int, 10:ws_ship_hdemo_sk:int, 11:ws_ship_addr_sk:int, 12:ws_web_page_sk:int, 13:ws_web_site_sk:int, 14:ws_ship_mode_sk:int, 15:ws_warehouse_sk:int, 16:ws_promo_sk:int, 17:ws_order_number:int, 18:ws_quantity:int, 19:ws_wholesale_cost:decimal(7,2)/DECIMAL_64, 20:ws_list_price:decimal(7,2)/DECIMAL_64, 21:ws_sales_price:decimal(7,2)/DECIMAL_64, 22:ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, 23:ws_ext_sales_price:decimal(7,2)/DECIMAL_64, 24:ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, 25:ws_ext_list_price:decimal(7,2)/DECIMAL_64, 26:ws_ext_tax:decimal(7,2)/DECIMAL_64, 27:ws_coupon_amt:decimal(7,2)/DECIMAL_64, 28:ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, 29:ws_net_paid:decimal(7,2)/DECIMAL_64, 30:ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, 31:ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, 32:ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, 33:ws_net_profit:decimal(7,2)/DECIMAL_64, 34:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 17:int) + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [15, 17] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 17:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 15:int + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 34 + includeColumns: [15, 17] + dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 16 + Map Operator Tree: + TableScan + alias: ws2 + filterExpr: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ws_sold_date_sk:int, 1:ws_sold_time_sk:int, 2:ws_ship_date_sk:int, 3:ws_item_sk:int, 4:ws_bill_customer_sk:int, 5:ws_bill_cdemo_sk:int, 6:ws_bill_hdemo_sk:int, 7:ws_bill_addr_sk:int, 8:ws_ship_customer_sk:int, 9:ws_ship_cdemo_sk:int, 10:ws_ship_hdemo_sk:int, 11:ws_ship_addr_sk:int, 12:ws_web_page_sk:int, 13:ws_web_site_sk:int, 14:ws_ship_mode_sk:int, 15:ws_warehouse_sk:int, 16:ws_promo_sk:int, 17:ws_order_number:int, 18:ws_quantity:int, 19:ws_wholesale_cost:decimal(7,2)/DECIMAL_64, 20:ws_list_price:decimal(7,2)/DECIMAL_64, 21:ws_sales_price:decimal(7,2)/DECIMAL_64, 22:ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, 23:ws_ext_sales_price:decimal(7,2)/DECIMAL_64, 24:ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, 25:ws_ext_list_price:decimal(7,2)/DECIMAL_64, 26:ws_ext_tax:decimal(7,2)/DECIMAL_64, 27:ws_coupon_amt:decimal(7,2)/DECIMAL_64, 28:ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, 29:ws_net_paid:decimal(7,2)/DECIMAL_64, 30:ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, 31:ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, 32:ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, 33:ws_net_profit:decimal(7,2)/DECIMAL_64, 34:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 17:int) + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [15, 17] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 17:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 17:int + valueColumns: 15:int + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 34 + includeColumns: [15, 17] + dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 17 + Map Operator Tree: + TableScan + alias: web_returns + filterExpr: wr_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:wr_returned_date_sk:int, 1:wr_returned_time_sk:int, 2:wr_item_sk:int, 3:wr_refunded_customer_sk:int, 4:wr_refunded_cdemo_sk:int, 5:wr_refunded_hdemo_sk:int, 6:wr_refunded_addr_sk:int, 7:wr_returning_customer_sk:int, 8:wr_returning_cdemo_sk:int, 9:wr_returning_hdemo_sk:int, 10:wr_returning_addr_sk:int, 11:wr_web_page_sk:int, 12:wr_reason_sk:int, 13:wr_order_number:int, 14:wr_return_quantity:int, 15:wr_return_amt:decimal(7,2)/DECIMAL_64, 16:wr_return_tax:decimal(7,2)/DECIMAL_64, 17:wr_return_amt_inc_tax:decimal(7,2)/DECIMAL_64, 18:wr_fee:decimal(7,2)/DECIMAL_64, 19:wr_return_ship_cost:decimal(7,2)/DECIMAL_64, 20:wr_refunded_cash:decimal(7,2)/DECIMAL_64, 21:wr_reversed_charge:decimal(7,2)/DECIMAL_64, 22:wr_account_credit:decimal(7,2)/DECIMAL_64, 23:wr_net_loss:decimal(7,2)/DECIMAL_64, 24:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 13:int) + predicate: wr_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wr_order_number (type: int) + outputColumnNames: _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int) + sort order: + + Map-reduce partition columns: _col13 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 13:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 13:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 24 + includeColumns: [13] + dataColumns: wr_returned_date_sk:int, wr_returned_time_sk:int, wr_item_sk:int, wr_refunded_customer_sk:int, wr_refunded_cdemo_sk:int, wr_refunded_hdemo_sk:int, wr_refunded_addr_sk:int, wr_returning_customer_sk:int, wr_returning_cdemo_sk:int, wr_returning_hdemo_sk:int, wr_returning_addr_sk:int, wr_web_page_sk:int, wr_reason_sk:int, wr_order_number:int, wr_return_quantity:int, wr_return_amt:decimal(7,2)/DECIMAL_64, wr_return_tax:decimal(7,2)/DECIMAL_64, wr_return_amt_inc_tax:decimal(7,2)/DECIMAL_64, wr_fee:decimal(7,2)/DECIMAL_64, wr_return_ship_cost:decimal(7,2)/DECIMAL_64, wr_refunded_cash:decimal(7,2)/DECIMAL_64, wr_reversed_charge:decimal(7,2)/DECIMAL_64, wr_account_credit:decimal(7,2)/DECIMAL_64, wr_net_loss:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 18 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:d_date_sk:int, 1:d_date_id:string, 2:d_date:string, 3:d_month_seq:int, 4:d_week_seq:int, 5:d_quarter_seq:int, 6:d_year:int, 7:d_dow:int, 8:d_moy:int, 9:d_dom:int, 10:d_qoy:int, 11:d_fy_year:int, 12:d_fy_quarter_seq:int, 13:d_fy_week_seq:int, 14:d_day_name:string, 15:d_quarter_name:string, 16:d_holiday:string, 17:d_weekend:string, 18:d_following_holiday:string, 19:d_first_dom:int, 20:d_last_dom:int, 21:d_same_day_ly:int, 22:d_same_day_lq:int, 23:d_current_day:string, 24:d_current_week:string, 25:d_current_month:string, 26:d_current_quarter:string, 27:d_current_year:string, 28:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1999-04-30 17:00:00.0, right 1999-06-29 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 28 + includeColumns: [0, 2] + dataColumns: d_date_sk:int, d_date_id:string, d_date:string, d_month_seq:int, d_week_seq:int, d_quarter_seq:int, d_year:int, d_dow:int, d_moy:int, d_dom:int, d_qoy:int, d_fy_year:int, d_fy_quarter_seq:int, d_fy_week_seq:int, d_day_name:string, d_quarter_name:string, d_holiday:string, d_weekend:string, d_following_holiday:string, d_first_dom:int, d_last_dom:int, d_same_day_ly:int, d_same_day_lq:int, d_current_day:string, d_current_week:string, d_current_month:string, d_current_quarter:string, d_current_year:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp] + Map 9 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ca_address_sk:int, 1:ca_address_id:string, 2:ca_street_number:string, 3:ca_street_name:string, 4:ca_street_type:string, 5:ca_suite_number:string, 6:ca_city:string, 7:ca_county:string, 8:ca_state:string, 9:ca_zip:string, 10:ca_country:string, 11:ca_gmt_offset:decimal(5,2)/DECIMAL_64, 12:ca_location_type:string, 13:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 8:string, val TX), SelectColumnIsNotNull(col 0:int)) + predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [0, 8] + dataColumns: ca_address_sk:int, ca_address_id:string, ca_street_number:string, ca_street_name:string, ca_street_type:string, ca_suite_number:string, ca_city:string, ca_county:string, ca_state:string, ca_zip:string, ca_country:string, ca_gmt_offset:decimal(5,2)/DECIMAL_64, ca_location_type:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col0 <> _col2)} + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 13 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 0:int + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col13 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col13 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 15 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 0:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 290 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 290 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 638 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 638 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col5) + keys: _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:int, VALUE._col0:decimal(17,2), VALUE._col1:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: PARTIAL2 + keyExpressions: col 0:int + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), sum(_col1), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:int) -> bigint, VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: PARTIAL2 + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1, 2] + mode: partial2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:decimal(17,2), 2:decimal(17,2) + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Reducer 7 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: VALUE._col0:bigint, VALUE._col1:decimal(17,2), VALUE._col2:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2, 0] + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(17,2), 2:decimal(17,2) + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Reducer 8 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:decimal(17,2), VALUE._col1:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 9cfc960bf0..07cc4dbabc 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -5156,10 +5156,9 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5221,12 +5220,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -5340,15 +5333,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 is not null and _col2 is not null) (type: boolean) Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/subquery_select.q.out ql/src/test/results/clientpositive/llap/subquery_select.q.out index 0435530467..d3cc980ca1 100644 --- ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -3190,13 +3190,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 10 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 8 <- Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3237,15 +3236,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: part @@ -3265,37 +3258,6 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) - Group By Operator - aggregations: count(), count(_col0) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (_col0 = 0L) (type: boolean), (_col1 < _col0) (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: boolean), _col1 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3392,15 +3354,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: boolean), _col3 (type: boolean) - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE @@ -3414,6 +3367,37 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 = 0L) (type: boolean), (_col1 < _col0) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: boolean), _col1 (type: boolean) Stage: Stage-0 Fetch Operator