diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java index ac050df30b..cfb1e7ef38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java @@ -63,9 +63,8 @@ public void onMatch(RelOptRuleCall call) { final HepPlanner tmpPlanner = new HepPlanner(PROGRAM); tmpPlanner.setRoot(node); node = tmpPlanner.findBestExp(); - final HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - relBuilderFactory.create(node.getCluster(), null), fetchStats); - call.transformTo(fieldTrimmer.trim(node)); + call.transformTo( + HiveRelFieldTrimmer.get(fetchStats).trim(node)); triggered = true; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index d218face89..a54ea8a910 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -28,7 +28,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import org.apache.calcite.adapter.druid.DruidQuery; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; @@ -51,10 +54,13 @@ import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.sql.SqlExplainFormat; +import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.CorrelationReferenceFinder; import org.apache.calcite.sql2rel.RelFieldTrimmer; +import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; @@ -62,15 +68,19 @@ import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,28 +91,88 @@ protected static final Logger LOG = LoggerFactory.getLogger(HiveRelFieldTrimmer.class); - private ColumnAccessInfo columnAccessInfo; - private Map viewProjectToTableSchema; + // We initialize the field trimmer statically here and we will reuse it across + // queries. The reason is that otherwise we will create a new dispatcher with + // each instantiation, thus effectively removing the caching mechanism that is + // built within the dispatcher. + private static final RelOptCluster REL_OPT_CLUSTER; + static { + final RelOptPlanner planner = CalcitePlanner.createPlanner(new HiveConf()); + final RexBuilder rexBuilder = new RexBuilder( + new JavaTypeFactoryImpl(new HiveTypeSystemImpl())); + REL_OPT_CLUSTER = RelOptCluster.create(planner, rexBuilder); + } + private static final HiveRelFieldTrimmer FIELD_TRIMMER_STATS = + new HiveRelFieldTrimmer( + null, + HiveRelFactories.HIVE_BUILDER.create(REL_OPT_CLUSTER, null), + true); + private static final HiveRelFieldTrimmer FIELD_TRIMMER_NO_STATS = + new HiveRelFieldTrimmer( + null, + HiveRelFactories.HIVE_BUILDER.create(REL_OPT_CLUSTER, null), + false); + private final RelBuilder relBuilder; private final boolean fetchStats; - public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { - this(validator, relBuilder, false); - } + private static final ThreadLocal COLUMN_ACCESS_INFO = + new ThreadLocal<>(); + private static final ThreadLocal> VIEW_PROJECT_TO_TABLE_SCHEMA = + new ThreadLocal<>(); - public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, - ColumnAccessInfo columnAccessInfo, Map viewToTableSchema) { - this(validator, relBuilder, false); - this.columnAccessInfo = columnAccessInfo; - this.viewProjectToTableSchema = viewToTableSchema; - } - public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, boolean fetchStats) { + private HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, boolean fetchStats) { super(validator, relBuilder); this.relBuilder = relBuilder; this.fetchStats = fetchStats; } + /** + * Returns a HiveRelFieldTrimmer instance that does not retrieve + * stats. + */ + public static HiveRelFieldTrimmer get() { + return get(false); + } + + /** + * Returns a HiveRelFieldTrimmer instance that retrieves + * stats. + */ + public static HiveRelFieldTrimmer get(boolean fetchStats) { + return fetchStats ? FIELD_TRIMMER_STATS : FIELD_TRIMMER_NO_STATS; + } + + /** + * Trims unused fields from a relational expression. + * + *

We presume that all fields of the relational expression are wanted by + * its consumer, so only trim fields that are not used within the tree. + * + * @param root Root node of relational expression + * @return Trimmed relational expression + */ + @Override + public RelNode trim(RelNode root) { + return trim(root, null, null); + } + + public RelNode trim(RelNode root, + ColumnAccessInfo columnAccessInfo, Map viewToTableSchema) { + try { + // Set local thread variables + COLUMN_ACCESS_INFO.set(columnAccessInfo); + VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema); + // Execute pruning + return super.trim(root); + } finally { + // Always remove the local thread variables to avoid leaks + COLUMN_ACCESS_INFO.remove(); + VIEW_PROJECT_TO_TABLE_SCHEMA.remove(); + } + } + /** * Trims the fields of an input relational expression. * @@ -675,12 +745,14 @@ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Se public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, Set extraFields) { // set columnAccessInfo for ViewColumnAuthorization - if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null - && this.viewProjectToTableSchema.containsKey(project)) { + final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get(); + final Map viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get(); + if (columnAccessInfo != null && viewProjectToTableSchema != null + && viewProjectToTableSchema.containsKey(project)) { for (Ord ord : Ord.zip(project.getProjects())) { if (fieldsUsed.get(ord.i)) { - Table tab = this.viewProjectToTableSchema.get(project); - this.columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); + Table tab = viewProjectToTableSchema.get(project); + columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); } } } @@ -690,7 +762,8 @@ public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set extraFields) { final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields); - if (this.columnAccessInfo != null) { + final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get(); + if (columnAccessInfo != null) { // Store information about column accessed by the table so it can be used // to send only this information for column masking final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 537355f7ed..820e284ee0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1794,11 +1794,8 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); // We need to get the ColumnAccessInfo and viewToTableSchema for views. - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - HiveRelFactories.HIVE_BUILDER.create(optCluster, null), - this.columnAccessInfo, this.viewProjectToTableSchema); - - fieldTrimmer.trim(calciteGenPlan); + HiveRelFieldTrimmer.get() + .trim(calciteGenPlan, this.columnAccessInfo, this.viewProjectToTableSchema); // Create and set MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);