diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index cdc71e08ad..d3559a5e46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -169,8 +169,10 @@ import org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable; import org.apache.hadoop.hive.ql.parse.AlterTablePartMergeFilesDesc; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PreInsertTableDesc; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -1257,6 +1259,22 @@ private int alterMaterializedView(Hive db, AlterMaterializedViewDesc alterMVDesc // This is a noop, return successfully return 0; } + if (alterMVDesc.isRewriteEnable()) { + try { + final CalcitePlanner planner = ParseUtils.getAnalyzer(conf); + planner.genLogicalPlan(ParseUtils.parse(mv.getViewExpandedText())); + if (!planner.isCboSucceeded()) { + throw new HiveException("Cannot enable automatic rewriting for materialized view. " + + "Materialized view definition could not be parsed by CBO."); + } + if (!planner.isValidAutomaticRewritingMaterialization()) { + throw new HiveException("Cannot enable rewriting for materialized view. " + + planner.getInvalidAutomaticRewritingMaterializationReason()); + } + } catch (Exception e) { + throw new HiveException("Unable to parse materialized view definition", e); + } + } mv.setRewriteEnabled(alterMVDesc.isRewriteEnable()); break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptAutomaticRewritingMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptAutomaticRewritingMaterializationValidator.java new file mode 100644 index 0000000000..ee3e24565c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptAutomaticRewritingMaterializationValidator.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalExchange; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalIntersect; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalMatch; +import org.apache.calcite.rel.logical.LogicalMinus; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.logical.LogicalUnion; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.Util; + +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Checks whether the plan is supported by the automatic rewriting algorithm. + * For instance, a materialized view definition should be SPJA query and it + * cannot contain outer joins. + */ +public class HiveRelOptAutomaticRewritingMaterializationValidator extends HiveRelShuttleImpl { + static final Logger LOG = LoggerFactory.getLogger(HiveRelOptAutomaticRewritingMaterializationValidator.class); + + protected String invalidReason; + + public void validate(RelNode relNode) { + try { + relNode.accept(this); + } catch (Util.FoundOne e) { + // Can ignore - the check failed. + } + } + + @Override + public RelNode visit(TableScan scan) { + return super.visit(scan); + } + + @Override + public RelNode visit(HiveProject project) { + return super.visit(project); + } + + @Override + public RelNode visit(HiveFilter filter) { + return super.visit(filter); + } + + @Override + public RelNode visit(HiveJoin join) { + if (join.getJoinType() != JoinRelType.INNER) { + fail(join.getJoinType() + " join type is not supported by rewriting algorithm."); + } + return super.visit(join); + } + + @Override + public RelNode visit(HiveAggregate aggregate) { + return super.visit(aggregate); + } + + @Override + public RelNode visit(RelNode node) { + // Fall-back for an unexpected RelNode type + return fail(node); + } + + @Override + public RelNode visit(TableFunctionScan scan) { + // Not supported - fail + return fail(scan); + } + + @Override + public RelNode visit(LogicalValues values) { + // Not expected to be encountered for Hive - fail + return fail(values); + } + + @Override + public RelNode visit(LogicalFilter filter) { + // Not expected to be encountered for Hive - fail + return fail(filter); + } + + @Override + public RelNode visit(LogicalProject project) { + // Not expected to be encountered for Hive - fail + return fail(project); + } + + @Override + public RelNode visit(LogicalJoin join) { + // Not expected to be encountered for Hive - fail + return fail(join); + } + + @Override + public RelNode visit(LogicalCorrelate correlate) { + // Not expected to be encountered for Hive - fail + return fail(correlate); + } + + @Override + public RelNode visit(LogicalUnion union) { + // Not expected to be encountered for Hive - fail + return fail(union); + } + + @Override + public RelNode visit(LogicalIntersect intersect) { + // Not expected to be encountered for Hive - fail + return fail(intersect); + } + + @Override + public RelNode visit(LogicalMinus minus) { + // Not expected to be encountered for Hive - fail + return fail(minus); + } + + @Override + public RelNode visit(LogicalAggregate aggregate) { + // Not expected to be encountered for Hive - fail + return fail(aggregate); + } + + @Override + public RelNode visit(LogicalMatch match) { + // Not expected to be encountered for Hive - fail + return fail(match); + } + + @Override + public RelNode visit(LogicalSort sort) { + // Not expected to be encountered for Hive - fail + return fail(sort); + } + + @Override + public RelNode visit(LogicalExchange exchange) { + // Not expected to be encountered for Hive - fail + return fail(exchange); + } + + private void fail(String reason) { + setInvalidReason(reason); + throw Util.FoundOne.NULL; + } + + private RelNode fail(RelNode node) { + setInvalidReason("Unsupported RelNode type " + node.getRelTypeName() + + " encountered in the query plan"); + throw Util.FoundOne.NULL; + } + + public String getInvalidReason() { + return invalidReason; + } + + public void setInvalidReason(String invalidReason) { + this.invalidReason = invalidReason; + } + + public boolean isValid() { + return invalidReason == null; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptCachedQueryValidator.java similarity index 90% rename from ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java rename to ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptCachedQueryValidator.java index df216e7555..784351b168 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptCachedQueryValidator.java @@ -18,10 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite; -import java.util.List; - import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.TableFunctionScan; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.logical.LogicalAggregate; @@ -39,11 +36,8 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.Util; - import org.apache.hadoop.hive.metastore.TableType; - import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; @@ -54,7 +48,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,12 +57,12 @@ * - References to temporary or external tables * - References to non-determinisitc functions. */ -public class HiveRelOpMaterializationValidator extends HiveRelShuttleImpl { - static final Logger LOG = LoggerFactory.getLogger(HiveRelOpMaterializationValidator.class); +public class HiveRelOptCachedQueryValidator extends HiveRelShuttleImpl { + static final Logger LOG = LoggerFactory.getLogger(HiveRelOptCachedQueryValidator.class); - protected String invalidMaterializationReason; + protected String invalidReason; - public void validateQueryMaterialization(RelNode relNode) { + public void validate(RelNode relNode) { try { relNode.accept(this); } catch (Util.FoundOne e) { @@ -254,12 +247,12 @@ private RelNode visit(HiveIntersect intersect) { } private void fail(String reason) { - setInvalidMaterializationReason(reason); + setInvalidReason(reason); throw Util.FoundOne.NULL; } private RelNode fail(RelNode node) { - setInvalidMaterializationReason("Unsupported RelNode type " + node.getRelTypeName() + + setInvalidReason("Unsupported RelNode type " + node.getRelTypeName() + " encountered in the query plan"); throw Util.FoundOne.NULL; } @@ -271,15 +264,15 @@ private void checkExpr(RexNode expr) { } } - public String getInvalidMaterializationReason() { - return invalidMaterializationReason; + public String getInvalidReason() { + return invalidReason; } - public void setInvalidMaterializationReason(String invalidMaterializationReason) { - this.invalidMaterializationReason = invalidMaterializationReason; + public void setInvalidReason(String invalidReason) { + this.invalidReason = invalidReason; } - public boolean isValidMaterialization() { - return invalidMaterializationReason == null; + public boolean isValid() { + return invalidReason == null; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 22f3266c87..0e870a5c74 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -148,7 +148,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOpMaterializationValidator; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptAutomaticRewritingMaterializationValidator; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptCachedQueryValidator; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -1731,16 +1732,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); - // Validate query materialization (materialized views, query results caching. - // This check needs to occur before constant folding, which may remove some - // function calls from the query plan. - HiveRelOpMaterializationValidator matValidator = new HiveRelOpMaterializationValidator(); - matValidator.validateQueryMaterialization(calciteGenPlan); - if (!matValidator.isValidMaterialization()) { - String reason = matValidator.getInvalidMaterializationReason(); - setInvalidQueryMaterializationReason(reason); - } - // Create executor RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster); calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); @@ -1766,6 +1757,31 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + // Validate query materialization for query results caching. This check needs + // to occur before constant folding, which may remove some function calls + // from the query plan. + // In addition, if it is a materialized view creation and we are enabling it + // for rewriting, it should pass all checks done for query results caching + // and on top of that we should check that it only contains operators that + // are supported by the rewriting algorithm. + HiveRelOptCachedQueryValidator cachedQueryValidator = new HiveRelOptCachedQueryValidator(); + cachedQueryValidator.validate(calciteGenPlan); + if (!cachedQueryValidator.isValid()) { + String reason = cachedQueryValidator.getInvalidReason(); + setInvalidQueryMaterializationReason(reason); + // Automatic query rewriting should meet all cached query validator conditions, + // hence if it does not, we can add the reason here and skip scanning it again + setInvalidAutomaticRewritingMaterializationReason(reason); + } else { + // It passed first test, check whether is is valid for automatic rewriting + HiveRelOptAutomaticRewritingMaterializationValidator mvValidator = + new HiveRelOptAutomaticRewritingMaterializationValidator(); + mvValidator.validate(calciteGenPlan); + if (!mvValidator.isValid()) { + setInvalidAutomaticRewritingMaterializationReason(mvValidator.getInvalidReason()); + } + } + // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider(), executorProvider); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index be1c59f932..5d229b65b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -555,7 +555,7 @@ public static RelNode parseQuery(HiveConf conf, String viewQuery) return analyzer.getResultSchema(); } - private static CalcitePlanner getAnalyzer(HiveConf conf) throws SemanticException, IOException { + public static CalcitePlanner getAnalyzer(HiveConf conf) throws SemanticException, IOException { final QueryState qs = new QueryState.Builder().withHiveConf(conf).build(); CalcitePlanner analyzer = new CalcitePlanner(qs); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6a6e6c3639..533026797f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -398,6 +398,7 @@ HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY); private String invalidQueryMaterializationReason; + private String invalidAutomaticRewritingMaterializationReason; private static final CommonToken SELECTDI_TOKEN = new ImmutableCommonToken(HiveParser.TOK_SELECTDI, "TOK_SELECTDI"); @@ -12373,6 +12374,16 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce validateCreateView(); if (createVwDesc.isMaterialized()) { + if (createVwDesc.isRewriteEnabled()) { + if (!isCboSucceeded()) { + throw new SemanticException("Cannot enable automatic rewriting for materialized view. " + + "Materialized view definition could not be parsed by CBO."); + } + if (!isValidAutomaticRewritingMaterialization()) { + throw new SemanticException("Cannot enable rewriting for materialized view. " + + getInvalidAutomaticRewritingMaterializationReason()); + } + } createVwDesc.setTablesUsed(getTablesUsed(pCtx)); } else { // Since we're only creating a view (not executing it), we don't need to @@ -15129,6 +15140,24 @@ public ColsAndTypes(String cols, String colTypes) { public String colTypes; } + public boolean isCboSucceeded() { + return this.ctx.isCboSucceeded(); + } + + public String getInvalidAutomaticRewritingMaterializationReason() { + return invalidAutomaticRewritingMaterializationReason; + } + + public void setInvalidAutomaticRewritingMaterializationReason( + String invalidAutomaticRewritingMaterializationReason) { + this.invalidAutomaticRewritingMaterializationReason = + invalidAutomaticRewritingMaterializationReason; + } + + public boolean isValidAutomaticRewritingMaterialization() { + return (invalidAutomaticRewritingMaterializationReason == null); + } + public String getInvalidQueryMaterializationReason() { return invalidQueryMaterializationReason; }