diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 8e6b01bfb4..8eb5c0181e 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -74,6 +74,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; import org.apache.hadoop.hive.ql.exec.tez.TezJobExecHelper; import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; @@ -773,6 +774,8 @@ public int run(String[] args) throws Exception { ss.updateThreadName(); + // Initialize metadata provider class + CalcitePlanner.initializeMetadataProviderClass(); // Create views registry HiveMaterializedViewsRegistry.get().init(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 41c2f9e6a6..0a2714255e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -17,14 +17,43 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import java.util.List; +import org.apache.calcite.adapter.druid.DruidQuery; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.AbstractConverter; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterImpl; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; +import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.JdbcHiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; @@ -41,45 +70,121 @@ public class HiveDefaultRelMetadataProvider { - private final HiveConf hiveConf; + /** + * The default metadata provider can be instantiated statically since + * it does not need any parameter specified by user (hive conf). + */ + private static final JaninoRelMetadataProvider DEFAULT = + JaninoRelMetadataProvider.of( + ChainedRelMetadataProvider.of( + ImmutableList.of( + HiveRelMdDistinctRowCount.SOURCE, + new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(), + HiveRelMdSelectivity.SOURCE, + HiveRelMdRowCount.SOURCE, + HiveRelMdUniqueKeys.SOURCE, + HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdSize.SOURCE, + HiveRelMdMemory.SOURCE, + HiveRelMdDistribution.SOURCE, + HiveRelMdCollation.SOURCE, + HiveRelMdPredicates.SOURCE, + JaninoRelMetadataProvider.DEFAULT))); + + /** + * This is the list of operators that are specifically used in Hive and + * should be loaded by the metadata providers. + */ + private static final List> HIVE_REL_NODE_CLASSES = + ImmutableList.of( + RelNode.class, + AbstractRelNode.class, + RelSubset.class, + HepRelVertex.class, + ConverterImpl.class, + AbstractConverter.class, + + HiveTableScan.class, + HiveAggregate.class, + HiveExcept.class, + HiveFilter.class, + HiveIntersect.class, + HiveJoin.class, + HiveMultiJoin.class, + HiveProject.class, + HiveRelNode.class, + HiveSemiJoin.class, + HiveSortExchange.class, + HiveSortLimit.class, + HiveTableFunctionScan.class, + HiveUnion.class, + + DruidQuery.class, + + HiveJdbcConverter.class, + JdbcHiveTableScan.class, + JdbcAggregate.class, + JdbcFilter.class, + JdbcJoin.class, + JdbcProject.class, + JdbcSort.class, + JdbcUnion.class); + + private final RelMetadataProvider metadataProvider; public HiveDefaultRelMetadataProvider(HiveConf hiveConf) { - this.hiveConf = hiveConf; + this.metadataProvider = init(hiveConf); } - public RelMetadataProvider getMetadataProvider() { - + private RelMetadataProvider init(HiveConf hiveConf) { // Create cost metadata provider - final HiveCostModel cm; - if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") - && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { - cm = HiveOnTezCostModel.getCostModel(hiveConf); - } else { - cm = HiveDefaultCostModel.getCostModel(); + if (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { + // Get max split size for HiveRelMdParallelism + final Double maxSplitSize = (double) HiveConf.getLongVar( + hiveConf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + + // Create and return metadata provider + JaninoRelMetadataProvider metadataProvider = JaninoRelMetadataProvider.of( + ChainedRelMetadataProvider.of( + ImmutableList.of( + HiveRelMdDistinctRowCount.SOURCE, + new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(), + HiveRelMdSelectivity.SOURCE, + HiveRelMdRowCount.SOURCE, + HiveRelMdUniqueKeys.SOURCE, + HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdSize.SOURCE, + HiveRelMdMemory.SOURCE, + new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), + HiveRelMdDistribution.SOURCE, + HiveRelMdCollation.SOURCE, + HiveRelMdPredicates.SOURCE, + JaninoRelMetadataProvider.DEFAULT))); + + metadataProvider.register(HIVE_REL_NODE_CLASSES); + + return metadataProvider; } - // Get max split size for HiveRelMdParallelism - final Double maxSplitSize = (double) HiveConf.getLongVar( - this.hiveConf, - HiveConf.ConfVars.MAPREDMAXSPLITSIZE); - - // Return MD provider - return ChainedRelMetadataProvider.of(ImmutableList - .of( - HiveRelMdDistinctRowCount.SOURCE, - new HiveRelMdCost(cm).getMetadataProvider(), - HiveRelMdSelectivity.SOURCE, - HiveRelMdRowCount.SOURCE, - HiveRelMdUniqueKeys.SOURCE, - HiveRelMdColumnUniqueness.SOURCE, - HiveRelMdSize.SOURCE, - HiveRelMdMemory.SOURCE, - new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), - HiveRelMdDistribution.SOURCE, - HiveRelMdCollation.SOURCE, - HiveRelMdPredicates.SOURCE, - DefaultRelMetadataProvider.INSTANCE)); + return DEFAULT; } + public RelMetadataProvider getMetadataProvider() { + return metadataProvider; + } + + /** + * This method can be called at startup time to pre-register all the + * additional Hive classes (compared to Calcite core classes) that may + * be visited during the planning phase. + */ + public static void initializeMetadataProviderClass() { + // This will register the classes in the default Janino implementation + JaninoRelMetadataProvider.DEFAULT.register( + HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES); + // This will register the classes in the default Hive implementation + DEFAULT.register(HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f5a1c74671..de88783584 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -537,7 +537,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept if (!explainConfig.isCboJoinCost()) { // Include cost as provided by Calcite newPlan.getCluster().invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT); } if (explainConfig.isFormatted()) { this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan)); @@ -1768,17 +1768,21 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create and set MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); - RelMetadataQuery.THREAD_PROVIDERS.set( - JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); //Remove subquery - LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + } calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, new HiveSubQueryRemoveRule(conf)); - LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); - + if (LOG.isDebugEnabled()) { + LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + } calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); - LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + } // Validate query materialization for query results caching. This check needs // to occur before constant folding, which may remove some function calls @@ -2243,7 +2247,7 @@ private RelNode copyNodeScan(RelNode scan) { // Use Calcite cost model for view rewriting optCluster.invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT); // Add materializations to planner for (RelOptMaterialization materialization : materializations) { @@ -5148,6 +5152,15 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { } } + /** + * This method can be called at startup time to pre-register all the + * additional Hive classes (compared to Calcite core classes) that may + * be visited during the planning phase. + */ + public static void initializeMetadataProviderClass() { + HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(); + } + private enum TableType { DRUID, NATIVE, diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index f9fb854106..452b8d823b 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -76,6 +76,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.events.NotificationEventPoll; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.plan.mapper.StatsSources; import org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider; import org.apache.hadoop.hive.ql.security.authorization.PolicyProviderContainer; @@ -241,6 +242,9 @@ public void run() { LlapRegistryService.getClient(hiveConf); } + // Initialize metadata provider class + CalcitePlanner.initializeMetadataProviderClass(); + try { sessionHive = Hive.get(hiveConf); } catch (HiveException e) {