diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1838300247..3d27af5578 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -636,6 +636,7 @@ minillaplocal.query.files=\ materialized_view_rewrite_8.q,\ materialized_view_rewrite_9.q,\ materialized_view_rewrite_10.q,\ + materialized_view_rewrite_in_between.q,\ materialized_view_rewrite_no_join_opt.q,\ materialized_view_rewrite_no_join_opt_2.q,\ materialized_view_rewrite_part_1.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index cde8eada64..bf34557782 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1895,7 +1895,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv PerfLogger perfLogger = SessionState.getPerfLogger(); final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT); - final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); final HepProgramBuilder program = new HepProgramBuilder(); @@ -1951,11 +1950,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE); rules.add(HiveAggregateReduceFunctionsRule.INSTANCE); rules.add(HiveAggregateReduceRule.INSTANCE); - if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { - rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses)); - rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses)); - rules.add(new HivePointLookupOptimizerRule.ProjectionExpressions(minNumORClauses)); - } rules.add(HiveProjectJoinTransposeRule.INSTANCE); if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_CONSTRAINTS_JOIN) && profilesCBO.contains(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS)) { @@ -2290,11 +2284,25 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv final HepProgramBuilder program = new HepProgramBuilder(); + final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + // 1. Run other optimizations that do not need stats + List rules = Lists.newArrayList(); + rules.add(ProjectRemoveRule.INSTANCE); + rules.add(HiveUnionMergeRule.INSTANCE); + rules.add(HiveAggregateProjectMergeRule.INSTANCE); + rules.add(HiveProjectMergeRule.INSTANCE_NO_FORCE); + rules.add(HiveJoinCommuteRule.INSTANCE); + if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && + !ctx.isLoadingMaterializedView()) { + // We do not close these conditions when we load materialized views since + // this may prevent some rewritings from happening + rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses)); + rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses)); + rules.add(new HivePointLookupOptimizerRule.ProjectionExpressions(minNumORClauses)); + } generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, - ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, - HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE, - HiveJoinCommuteRule.INSTANCE); + rules.toArray(new RelOptRule[rules.size()])); // 2. Run aggregate-join transpose (cost based) // If it failed because of missing stats, we continue with @@ -2348,7 +2356,7 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv // 8. Apply JDBC transformation rules if (conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_PUSHDOWN)) { - List rules = Lists.newArrayList(); + rules = Lists.newArrayList(); rules.add(JDBCExpandExpressionsRule.FILTER_INSTANCE); rules.add(JDBCExpandExpressionsRule.JOIN_INSTANCE); rules.add(JDBCExpandExpressionsRule.PROJECT_INSTANCE); diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_in_between.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_in_between.q new file mode 100644 index 0000000000..f769888f2c --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_in_between.q @@ -0,0 +1,61 @@ +SET hive.cli.errors.ignore=true; +SET hive.support.concurrency=true; +SET hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +SET metastore.strict.managed.tables=true; +SET hive.default.fileformat=textfile; +SET hive.default.fileformat.managed=orc; +SET metastore.create.as.acid=true; +SET hive.groupby.position.alias=true; + +drop database if exists expr2 cascade; +create database expr2; +use expr2; +create table sales(prod_id int, cust_id int, store_id int, sale_date timestamp, qty int, amt double, descr string); +insert into sales values +(11,1,101,'12/24/2013',1000,1234.00,'onedummytwo'); + +create materialized view mv1 stored as orc as (select prod_id, cust_id, store_id, sale_date, qty, amt, descr from sales where cust_id in (1,2,3,4,5)); +-- SAME ORDER +explain cbo +select prod_id, cust_id from sales where cust_id in (1,2,3,4,5); +-- DIFFERENT ORDER +explain cbo +select prod_id, cust_id from sales where cust_id in (5,1,2,3,4); + +drop materialized view mv1; + +drop database if exists in_pred cascade; +create database in_pred; +use in_pred; +create table census_pop (state string, year int, population bigint); +insert into census_pop values("AZ", 2010, 200), ("CA", 2011, 100), ("CA", 2010, 200), ("AZ", 2010, 100), ("NY", 2011, 121), ("AZ", 2011, 1000), ("OR", 2015, 1001), ("WA", 2016, 121), ("NJ", 2010, 500), ("NJ", 2010, 5000), ("AZ", 2014, 1004), ("TX", 2010, 1000), ("AZ", 2010, 1000), ("PT", 2017, 1200), ("NM", 2018, 120), ("CA", 2010, 200); + +create materialized view mv2 stored as orc as select state, year, sum(population) from census_pop where year IN (2010, 2018) group by state, year; +-- SAME +explain cbo +select state, year, sum(population) from census_pop where year IN (2010, 2018) group by state, year; +-- PARTIAL IN EQUALS +explain cbo +select state, year, sum(population) from census_pop where year = 2010 group by state, year; +-- PARTIAL +explain cbo +select state, year, sum(population) from census_pop where year in (2010) group by state, year; + +drop materialized view mv2; + +drop database if exists expr9 cascade; +create database expr9; +use expr9; +create table sales(prod_id int, cust_id int, store_id int, sale_date timestamp, qty int, amt double, descr string); +insert into sales values +(11,1,101,'12/24/2013',1000,1234.00,'onedummytwo'); + +create materialized view mv3 stored as orc as (select prod_id, cust_id, store_id, sale_date, qty, amt, descr from sales where cust_id >= 1 and prod_id < 31); +-- SAME +explain cbo +select * from sales where cust_id >= 1 and prod_id < 31; +-- BETWEEN AND RANGE +explain cbo +select * from sales where cust_id between 1 and 20 and prod_id < 31; + +drop materialized view mv3; diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_in_between.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_in_between.q.out new file mode 100644 index 0000000000..de303da574 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_in_between.q.out @@ -0,0 +1,285 @@ +PREHOOK: query: drop database if exists expr2 cascade +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists expr2 cascade +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database expr2 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:expr2 +POSTHOOK: query: create database expr2 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:expr2 +PREHOOK: query: use expr2 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:expr2 +POSTHOOK: query: use expr2 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:expr2 +PREHOOK: query: create table sales(prod_id int, cust_id int, store_id int, sale_date timestamp, qty int, amt double, descr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:expr2 +PREHOOK: Output: expr2@sales +POSTHOOK: query: create table sales(prod_id int, cust_id int, store_id int, sale_date timestamp, qty int, amt double, descr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:expr2 +POSTHOOK: Output: expr2@sales +PREHOOK: query: insert into sales values +(11,1,101,'12/24/2013',1000,1234.00,'onedummytwo') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: expr2@sales +POSTHOOK: query: insert into sales values +(11,1,101,'12/24/2013',1000,1234.00,'onedummytwo') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: expr2@sales +POSTHOOK: Lineage: sales.amt SCRIPT [] +POSTHOOK: Lineage: sales.cust_id SCRIPT [] +POSTHOOK: Lineage: sales.descr SCRIPT [] +POSTHOOK: Lineage: sales.prod_id SCRIPT [] +POSTHOOK: Lineage: sales.qty SCRIPT [] +POSTHOOK: Lineage: sales.sale_date SCRIPT [] +POSTHOOK: Lineage: sales.store_id SCRIPT [] +PREHOOK: query: create materialized view mv1 stored as orc as (select prod_id, cust_id, store_id, sale_date, qty, amt, descr from sales where cust_id in (1,2,3,4,5)) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: expr2@sales +PREHOOK: Output: database:expr2 +PREHOOK: Output: expr2@mv1 +POSTHOOK: query: create materialized view mv1 stored as orc as (select prod_id, cust_id, store_id, sale_date, qty, amt, descr from sales where cust_id in (1,2,3,4,5)) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: expr2@sales +POSTHOOK: Output: database:expr2 +POSTHOOK: Output: expr2@mv1 +PREHOOK: query: explain cbo +select prod_id, cust_id from sales where cust_id in (1,2,3,4,5) +PREHOOK: type: QUERY +PREHOOK: Input: expr2@mv1 +PREHOOK: Input: expr2@sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select prod_id, cust_id from sales where cust_id in (1,2,3,4,5) +POSTHOOK: type: QUERY +POSTHOOK: Input: expr2@mv1 +POSTHOOK: Input: expr2@sales +#### A masked pattern was here #### +CBO PLAN: +HiveProject(prod_id=[$0], cust_id=[$1]) + HiveTableScan(table=[[expr2, mv1]], table:alias=[expr2.mv1]) + +PREHOOK: query: explain cbo +select prod_id, cust_id from sales where cust_id in (5,1,2,3,4) +PREHOOK: type: QUERY +PREHOOK: Input: expr2@mv1 +PREHOOK: Input: expr2@sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select prod_id, cust_id from sales where cust_id in (5,1,2,3,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: expr2@mv1 +POSTHOOK: Input: expr2@sales +#### A masked pattern was here #### +CBO PLAN: +HiveProject(prod_id=[$0], cust_id=[$1]) + HiveTableScan(table=[[expr2, mv1]], table:alias=[expr2.mv1]) + +PREHOOK: query: drop materialized view mv1 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: expr2@mv1 +PREHOOK: Output: expr2@mv1 +POSTHOOK: query: drop materialized view mv1 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: expr2@mv1 +POSTHOOK: Output: expr2@mv1 +PREHOOK: query: drop database if exists in_pred cascade +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists in_pred cascade +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database in_pred +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:in_pred +POSTHOOK: query: create database in_pred +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:in_pred +PREHOOK: query: use in_pred +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:in_pred +POSTHOOK: query: use in_pred +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:in_pred +PREHOOK: query: create table census_pop (state string, year int, population bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:in_pred +PREHOOK: Output: in_pred@census_pop +POSTHOOK: query: create table census_pop (state string, year int, population bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:in_pred +POSTHOOK: Output: in_pred@census_pop +PREHOOK: query: insert into census_pop values("AZ", 2010, 200), ("CA", 2011, 100), ("CA", 2010, 200), ("AZ", 2010, 100), ("NY", 2011, 121), ("AZ", 2011, 1000), ("OR", 2015, 1001), ("WA", 2016, 121), ("NJ", 2010, 500), ("NJ", 2010, 5000), ("AZ", 2014, 1004), ("TX", 2010, 1000), ("AZ", 2010, 1000), ("PT", 2017, 1200), ("NM", 2018, 120), ("CA", 2010, 200) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: in_pred@census_pop +POSTHOOK: query: insert into census_pop values("AZ", 2010, 200), ("CA", 2011, 100), ("CA", 2010, 200), ("AZ", 2010, 100), ("NY", 2011, 121), ("AZ", 2011, 1000), ("OR", 2015, 1001), ("WA", 2016, 121), ("NJ", 2010, 500), ("NJ", 2010, 5000), ("AZ", 2014, 1004), ("TX", 2010, 1000), ("AZ", 2010, 1000), ("PT", 2017, 1200), ("NM", 2018, 120), ("CA", 2010, 200) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: in_pred@census_pop +POSTHOOK: Lineage: census_pop.population SCRIPT [] +POSTHOOK: Lineage: census_pop.state SCRIPT [] +POSTHOOK: Lineage: census_pop.year SCRIPT [] +PREHOOK: query: create materialized view mv2 stored as orc as select state, year, sum(population) from census_pop where year IN (2010, 2018) group by state, year +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: in_pred@census_pop +PREHOOK: Output: database:in_pred +PREHOOK: Output: in_pred@mv2 +POSTHOOK: query: create materialized view mv2 stored as orc as select state, year, sum(population) from census_pop where year IN (2010, 2018) group by state, year +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: in_pred@census_pop +POSTHOOK: Output: database:in_pred +POSTHOOK: Output: in_pred@mv2 +PREHOOK: query: explain cbo +select state, year, sum(population) from census_pop where year IN (2010, 2018) group by state, year +PREHOOK: type: QUERY +PREHOOK: Input: in_pred@census_pop +PREHOOK: Input: in_pred@mv2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select state, year, sum(population) from census_pop where year IN (2010, 2018) group by state, year +POSTHOOK: type: QUERY +POSTHOOK: Input: in_pred@census_pop +POSTHOOK: Input: in_pred@mv2 +#### A masked pattern was here #### +CBO PLAN: +HiveTableScan(table=[[in_pred, mv2]], table:alias=[in_pred.mv2]) + +PREHOOK: query: explain cbo +select state, year, sum(population) from census_pop where year = 2010 group by state, year +PREHOOK: type: QUERY +PREHOOK: Input: in_pred@census_pop +PREHOOK: Input: in_pred@mv2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select state, year, sum(population) from census_pop where year = 2010 group by state, year +POSTHOOK: type: QUERY +POSTHOOK: Input: in_pred@census_pop +POSTHOOK: Input: in_pred@mv2 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(state=[$0], $f1=[CAST(2010):INTEGER], $f10=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($2)]) + HiveFilter(condition=[=(2010, $1)]) + HiveTableScan(table=[[in_pred, mv2]], table:alias=[in_pred.mv2]) + +PREHOOK: query: explain cbo +select state, year, sum(population) from census_pop where year in (2010) group by state, year +PREHOOK: type: QUERY +PREHOOK: Input: in_pred@census_pop +PREHOOK: Input: in_pred@mv2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select state, year, sum(population) from census_pop where year in (2010) group by state, year +POSTHOOK: type: QUERY +POSTHOOK: Input: in_pred@census_pop +POSTHOOK: Input: in_pred@mv2 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(state=[$0], $f1=[CAST(2010):INTEGER], $f10=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($2)]) + HiveFilter(condition=[=(2010, $1)]) + HiveTableScan(table=[[in_pred, mv2]], table:alias=[in_pred.mv2]) + +PREHOOK: query: drop materialized view mv2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: in_pred@mv2 +PREHOOK: Output: in_pred@mv2 +POSTHOOK: query: drop materialized view mv2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: in_pred@mv2 +POSTHOOK: Output: in_pred@mv2 +PREHOOK: query: drop database if exists expr9 cascade +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists expr9 cascade +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database expr9 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:expr9 +POSTHOOK: query: create database expr9 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:expr9 +PREHOOK: query: use expr9 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:expr9 +POSTHOOK: query: use expr9 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:expr9 +PREHOOK: query: create table sales(prod_id int, cust_id int, store_id int, sale_date timestamp, qty int, amt double, descr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:expr9 +PREHOOK: Output: expr9@sales +POSTHOOK: query: create table sales(prod_id int, cust_id int, store_id int, sale_date timestamp, qty int, amt double, descr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:expr9 +POSTHOOK: Output: expr9@sales +PREHOOK: query: insert into sales values +(11,1,101,'12/24/2013',1000,1234.00,'onedummytwo') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: expr9@sales +POSTHOOK: query: insert into sales values +(11,1,101,'12/24/2013',1000,1234.00,'onedummytwo') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: expr9@sales +POSTHOOK: Lineage: sales.amt SCRIPT [] +POSTHOOK: Lineage: sales.cust_id SCRIPT [] +POSTHOOK: Lineage: sales.descr SCRIPT [] +POSTHOOK: Lineage: sales.prod_id SCRIPT [] +POSTHOOK: Lineage: sales.qty SCRIPT [] +POSTHOOK: Lineage: sales.sale_date SCRIPT [] +POSTHOOK: Lineage: sales.store_id SCRIPT [] +PREHOOK: query: create materialized view mv3 stored as orc as (select prod_id, cust_id, store_id, sale_date, qty, amt, descr from sales where cust_id >= 1 and prod_id < 31) +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: expr9@sales +PREHOOK: Output: database:expr9 +PREHOOK: Output: expr9@mv3 +POSTHOOK: query: create materialized view mv3 stored as orc as (select prod_id, cust_id, store_id, sale_date, qty, amt, descr from sales where cust_id >= 1 and prod_id < 31) +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: expr9@sales +POSTHOOK: Output: database:expr9 +POSTHOOK: Output: expr9@mv3 +PREHOOK: query: explain cbo +select * from sales where cust_id >= 1 and prod_id < 31 +PREHOOK: type: QUERY +PREHOOK: Input: expr9@mv3 +PREHOOK: Input: expr9@sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from sales where cust_id >= 1 and prod_id < 31 +POSTHOOK: type: QUERY +POSTHOOK: Input: expr9@mv3 +POSTHOOK: Input: expr9@sales +#### A masked pattern was here #### +CBO PLAN: +HiveTableScan(table=[[expr9, mv3]], table:alias=[expr9.mv3]) + +PREHOOK: query: explain cbo +select * from sales where cust_id between 1 and 20 and prod_id < 31 +PREHOOK: type: QUERY +PREHOOK: Input: expr9@mv3 +PREHOOK: Input: expr9@sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from sales where cust_id between 1 and 20 and prod_id < 31 +POSTHOOK: type: QUERY +POSTHOOK: Input: expr9@mv3 +POSTHOOK: Input: expr9@sales +#### A masked pattern was here #### +CBO PLAN: +HiveFilter(condition=[>=(20, $1)]) + HiveTableScan(table=[[expr9, mv3]], table:alias=[expr9.mv3]) + +PREHOOK: query: drop materialized view mv3 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: expr9@mv3 +PREHOOK: Output: expr9@mv3 +POSTHOOK: query: drop materialized view mv3 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: expr9@mv3 +POSTHOOK: Output: expr9@mv3