diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 6a70a4a6bd..a52dd73653 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -570,6 +570,7 @@ minillaplocal.query.files=\ materialized_view_create_rewrite_5.q,\ materialized_view_describe.q,\ materialized_view_drop.q,\ + materialized_view_rebuild.q,\ materialized_view_rewrite_1.q,\ materialized_view_rewrite_2.q,\ materialized_view_rewrite_3.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 107d032eb7..cc35199834 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -68,6 +68,7 @@ import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelVisitor; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.type.RelDataType; @@ -177,6 +178,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -1340,7 +1342,7 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { * @return the list of materialized views available for rewriting * @throws HiveException */ - public List getAllValidMaterializedViews(boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) + public List getAllValidMaterializedViews(boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException { // Final result List result = new ArrayList<>(); @@ -1352,7 +1354,7 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { // Bail out: empty list continue; } - result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, txnList)); + result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, validTxnsList)); } return result; } catch (Exception e) { @@ -1361,12 +1363,12 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { } public List getValidMaterializedView(String dbName, String materializedViewName, - boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) throws HiveException { - return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, txnList); + boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException { + return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, validTxnsList); } private List getValidMaterializedViews(String dbName, List materializedViewNames, - boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) throws HiveException { + boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException { final boolean tryIncrementalRewriting = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL); final long defaultDiff = @@ -1421,7 +1423,7 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { } if (outdated && (!tryIncrementalRewriting || materializationInvInfo == null - || txnList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) { + || validTxnsList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) { // We will not try partial rewriting either because the config specification, this // is a rebuild over some non-transactional table, or there were update/delete // operations in the source tables (not supported yet) @@ -1450,7 +1452,7 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { // We will rewrite it to include the filters on transaction list // so we can produce partial rewritings materialization = augmentMaterializationWithTimeInformation( - materialization, txnList, new ValidTxnWriteIdList( + materialization, validTxnsList, new ValidTxnWriteIdList( materializationInvInfo.getValidTxnList())); } result.add(materialization); @@ -1473,7 +1475,7 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { // We will rewrite it to include the filters on transaction list // so we can produce partial rewritings materialization = augmentMaterializationWithTimeInformation( - materialization, txnList, new ValidTxnWriteIdList( + materialization, validTxnsList, new ValidTxnWriteIdList( materializationInvInfo.getValidTxnList())); } result.add(materialization); @@ -1497,8 +1499,24 @@ public Table apply(org.apache.hadoop.hive.metastore.api.Table table) { * its invalidation. */ private static RelOptMaterialization augmentMaterializationWithTimeInformation( - RelOptMaterialization materialization, ValidTxnWriteIdList currentTxnList, - ValidTxnWriteIdList materializationTxnList) { + RelOptMaterialization materialization, String validTxnsList, + ValidTxnWriteIdList materializationTxnList) throws LockException { + // Extract tables used by the query which will in turn be used to generate + // the corresponding txn write ids + List tablesUsed = new ArrayList<>(); + new RelVisitor() { + @Override + public void visit(RelNode node, int ordinal, RelNode parent) { + if (node instanceof TableScan) { + TableScan ts = (TableScan) node; + tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName()); + } + super.visit(node, ordinal, parent); + } + }.go(materialization.queryRel); + ValidTxnWriteIdList currentTxnList = + SessionState.get().getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList); + // Augment final RexBuilder rexBuilder = materialization.queryRel.getCluster().getRexBuilder(); final HepProgramBuilder augmentMaterializationProgram = new HepProgramBuilder() .addRuleInstance(new HiveAugmentMaterializationRule(rexBuilder, currentTxnList, materializationTxnList)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 0bc9d230e5..d66227eda9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2085,35 +2085,18 @@ private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba // Add views to planner List materializations = new ArrayList<>(); try { - // Extract tables used by the query which will in turn be used to generate - // the corresponding txn write ids - List tablesUsed = new ArrayList<>(); - new RelVisitor() { - @Override - public void visit(RelNode node, int ordinal, RelNode parent) { - if (node instanceof TableScan) { - TableScan ts = (TableScan) node; - tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName()); - } - super.visit(node, ordinal, parent); - } - }.go(basePlan); final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY); - ValidTxnWriteIdList txnWriteIds = null; - if (validTxnsList != null && !validTxnsList.isEmpty()) { - txnWriteIds = getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList); - } if (mvRebuildMode != MaterializationRebuildMode.NONE) { // We only retrieve the materialization corresponding to the rebuild. In turn, // we pass 'true' for the forceMVContentsUpToDate parameter, as we cannot allow the // materialization contents to be stale for a rebuild if we want to use it. materializations = Hive.get().getValidMaterializedView(mvRebuildDbName, mvRebuildName, - true, txnWriteIds); + true, validTxnsList); } else { // This is not a rebuild, we retrieve all the materializations. In turn, we do not need // to force the materialization contents to be up-to-date, as this is not a rebuild, and // we apply the user parameters (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead. - materializations = Hive.get().getAllValidMaterializedViews(false, txnWriteIds); + materializations = Hive.get().getAllValidMaterializedViews(false, validTxnsList); } // We need to use the current cluster for the scan operator on views, // otherwise the planner will throw an Exception (different planners) diff --git a/ql/src/test/queries/clientpositive/materialized_view_rebuild.q b/ql/src/test/queries/clientpositive/materialized_view_rebuild.q new file mode 100644 index 0000000000..9f6eaaf58d --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rebuild.q @@ -0,0 +1,31 @@ +-- SORT_QUERY_RESULTS + +set hive.vectorized.execution.enabled=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +drop materialized view if exists mv_rebuild; +drop table if exists basetable_rebuild; + +create table basetable_rebuild (a int, b varchar(256), c decimal(10,2)) +stored as orc TBLPROPERTIES ('transactional'='true'); + +insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8); + +create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b; + +select * from mv_rebuild; + +insert into basetable_rebuild values (4, 'amia', 7.5); + +select * from mv_rebuild; + +alter materialized view mv_rebuild rebuild; + +select * from mv_rebuild; + +drop materialized view mv_rebuild; +drop table basetable_rebuild; diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out new file mode 100644 index 0000000000..4d37d82b6e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: drop materialized view if exists mv_rebuild +PREHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: query: drop materialized view if exists mv_rebuild +POSTHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: query: drop table if exists basetable_rebuild +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists basetable_rebuild +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table basetable_rebuild (a int, b varchar(256), c decimal(10,2)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@basetable_rebuild +POSTHOOK: query: create table basetable_rebuild (a int, b varchar(256), c decimal(10,2)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@basetable_rebuild +PREHOOK: query: insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@basetable_rebuild +POSTHOOK: query: insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@basetable_rebuild +POSTHOOK: Lineage: basetable_rebuild.a SCRIPT [] +POSTHOOK: Lineage: basetable_rebuild.b SCRIPT [] +POSTHOOK: Lineage: basetable_rebuild.c SCRIPT [] +PREHOOK: query: create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@basetable_rebuild +PREHOOK: Output: database:default +PREHOOK: Output: default@mv_rebuild +POSTHOOK: query: create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@basetable_rebuild +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv_rebuild +PREHOOK: query: select * from mv_rebuild +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_rebuild +#### A masked pattern was here #### +POSTHOOK: query: select * from mv_rebuild +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_rebuild +#### A masked pattern was here #### +1 alfred 1 +2 bob 2 +2 bonnie 2 +3 calvin 3 +3 charlie 3 +PREHOOK: query: insert into basetable_rebuild values (4, 'amia', 7.5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@basetable_rebuild +POSTHOOK: query: insert into basetable_rebuild values (4, 'amia', 7.5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@basetable_rebuild +POSTHOOK: Lineage: basetable_rebuild.a SCRIPT [] +POSTHOOK: Lineage: basetable_rebuild.b SCRIPT [] +POSTHOOK: Lineage: basetable_rebuild.c SCRIPT [] +PREHOOK: query: select * from mv_rebuild +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_rebuild +#### A masked pattern was here #### +POSTHOOK: query: select * from mv_rebuild +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_rebuild +#### A masked pattern was here #### +1 alfred 1 +2 bob 2 +2 bonnie 2 +3 calvin 3 +3 charlie 3 +PREHOOK: query: alter materialized view mv_rebuild rebuild +PREHOOK: type: QUERY +PREHOOK: Input: default@basetable_rebuild +PREHOOK: Output: default@mv_rebuild +POSTHOOK: query: alter materialized view mv_rebuild rebuild +POSTHOOK: type: QUERY +POSTHOOK: Input: default@basetable_rebuild +POSTHOOK: Output: default@mv_rebuild +POSTHOOK: Lineage: mv_rebuild._c2 EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: mv_rebuild.a SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: mv_rebuild.b SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:b, type:varchar(256), comment:null), ] +PREHOOK: query: select * from mv_rebuild +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_rebuild +#### A masked pattern was here #### +POSTHOOK: query: select * from mv_rebuild +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_rebuild +#### A masked pattern was here #### +1 alfred 1 +2 bob 2 +2 bonnie 2 +3 calvin 3 +3 charlie 3 +4 amia 4 +PREHOOK: query: drop materialized view mv_rebuild +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv_rebuild +PREHOOK: Output: default@mv_rebuild +POSTHOOK: query: drop materialized view mv_rebuild +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv_rebuild +POSTHOOK: Output: default@mv_rebuild +PREHOOK: query: drop table basetable_rebuild +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@basetable_rebuild +PREHOOK: Output: default@basetable_rebuild +POSTHOOK: query: drop table basetable_rebuild +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@basetable_rebuild +POSTHOOK: Output: default@basetable_rebuild