diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 10bd97b..6b39aa8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -747,6 +747,10 @@ public static void doAuthorization(BaseSemanticAnalyzer sem, String command) continue; } Table tbl = read.getTable(); + if (tbl.isView()) { + tab2Cols.put(tbl, + sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getTableName())); + } if (read.getPartition() != null) { Partition partition = read.getPartition(); tbl = partition.getTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 78bce23..6d5202b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -788,6 +789,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, ArrayList rs_newsignature = new ArrayList(); for (String col : cols) { int index = originalOutputColumnNames.indexOf(col); + if (cppCtx.getParseContext().getColumnAccessInfo() != null + && cppCtx.getParseContext().getViewProjectToTableSchema() != null + && cppCtx.getParseContext().getViewProjectToTableSchema().containsKey(op)) { + Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op); + cppCtx.getParseContext().getColumnAccessInfo() + .add(tab.getTableName(), tab.getCols().get(index).getName()); + } newOutputColumnNames.add(col); newColList.add(originalColList.get(index)); rs_newsignature.add(rs_oldsignature.get(index)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 18145ae..ce30933 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -17,12 +17,15 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelCollation; @@ -50,7 +53,6 @@ import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.Stacks; import org.apache.calcite.util.Util; import org.apache.calcite.util.mapping.IntPair; import org.apache.calcite.util.mapping.Mapping; @@ -58,8 +60,11 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -70,11 +75,23 @@ private RelBuilder relBuilder; + private ColumnAccessInfo columnAccessInfo; + + private Map viewProjectToTableSchema; + public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { super(validator, relBuilder); this.relBuilder = relBuilder; } + public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, + ColumnAccessInfo columnAccessInfo, Map viewToTableSchema) { + super(validator, relBuilder); + this.relBuilder = relBuilder; + this.columnAccessInfo = columnAccessInfo; + this.viewProjectToTableSchema = viewToTableSchema; + } + /** * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}. @@ -358,4 +375,91 @@ protected RexNode handle(RexFieldAccess fieldAccess) { } return new TrimResult(r, mapping); } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalProject}. + */ + public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, + Set extraFields) { + final RelDataType rowType = project.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final RelNode input = project.getInput(); + + // Which fields are required from the input? + final Set inputExtraFields = new LinkedHashSet<>(extraFields); + RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(inputExtraFields); + for (Ord ord : Ord.zip(project.getProjects())) { + if (fieldsUsed.get(ord.i)) { + if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null + && this.viewProjectToTableSchema.containsKey(project)) { + Table tab = this.viewProjectToTableSchema.get(project); + this.columnAccessInfo.add(tab.getTableName(), tab.getCols().get(ord.i).getName()); + } + ord.e.accept(inputFinder); + } + } + ImmutableBitSet inputFieldsUsed = inputFinder.inputBitSet.build(); + + // Create input with trimmed columns. + TrimResult trimResult = trimChild(project, input, inputFieldsUsed, inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // If the input is unchanged, and we need to project all columns, + // there's nothing we can do. + if (newInput == input && fieldsUsed.cardinality() == fieldCount) { + return result(project, Mappings.createIdentity(fieldCount)); + } + + // Some parts of the system can't handle rows with zero fields, so + // pretend that one field is used. + if (fieldsUsed.cardinality() == 0) { + return dummyProject(fieldCount, newInput); + } + + // Build new project expressions, and populate the mapping. + final List newProjects = new ArrayList<>(); + final RexVisitor shuttle = new RexPermuteInputsShuttle(inputMapping, newInput); + final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, + fieldsUsed.cardinality()); + for (Ord ord : Ord.zip(project.getProjects())) { + if (fieldsUsed.get(ord.i)) { + mapping.set(ord.i, newProjects.size()); + RexNode newProjectExpr = ord.e.accept(shuttle); + newProjects.add(newProjectExpr); + } + } + + final RelDataType newRowType = RelOptUtil.permute(project.getCluster().getTypeFactory(), + rowType, mapping); + + relBuilder.push(newInput); + relBuilder.project(newProjects, newRowType.getFieldNames()); + return result(relBuilder.build(), mapping); + } + + /** + * Creates a project with a dummy column, to protect the parts of the system + * that cannot handle a relational expression with no columns. + * + * @param fieldCount + * Number of fields in the original relational expression + * @param input + * Trimmed input + * @return Dummy project, or null if no dummy is required + */ + private TrimResult dummyProject(int fieldCount, RelNode input) { + final RelOptCluster cluster = input.getCluster(); + final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, 1); + if (input.getRowType().getFieldCount() == 1) { + // Input already has one field (and may in fact be a dummy project we + // created for the child). We can't do better. + return result(input, mapping); + } + final RexLiteral expr = cluster.getRexBuilder().makeExactLiteral(BigDecimal.ZERO); + relBuilder.push(input); + relBuilder.project(ImmutableList. of(expr), ImmutableList.of("DUMMY")); + return result(relBuilder.build(), mapping); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 809affb..64204e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -695,7 +695,12 @@ private static void replaceASTChild(ASTNode child, ASTNode newChild) { ASTNode getOptimizedAST() throws SemanticException { ASTNode optiqOptimizedAST = null; RelNode optimizedOptiqPlan = null; - CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + + CalcitePlannerAction calcitePlannerAction = null; + if (this.columnAccessInfo == null) { + this.columnAccessInfo = new ColumnAccessInfo(); + } + calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, this.columnAccessInfo); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -717,7 +722,11 @@ ASTNode getOptimizedAST() throws SemanticException { */ Operator getOptimizedHiveOPDag() throws SemanticException { RelNode optimizedOptiqPlan = null; - CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + CalcitePlannerAction calcitePlannerAction = null; + if (this.columnAccessInfo == null) { + this.columnAccessInfo = new ColumnAccessInfo(); + } + calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, this.columnAccessInfo); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -879,14 +888,17 @@ private RowResolver genRowResolver(Operator op, QB qb) { private RelOptCluster cluster; private RelOptSchema relOptSchema; private final Map partitionCache; + private final ColumnAccessInfo columnAccessInfo; + private Map viewProjectToTableSchema; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. LinkedHashMap relToHiveRR = new LinkedHashMap(); LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); - CalcitePlannerAction(Map partitionCache) { + CalcitePlannerAction(Map partitionCache, ColumnAccessInfo columnAccessInfo) { this.partitionCache = partitionCache; + this.columnAccessInfo = columnAccessInfo; } @Override @@ -928,6 +940,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); + // We need to get the ColumnAccessInfo and viewToTableSchema for views. + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + HiveRelFactories.HIVE_BUILDER.create(cluster, null), this.columnAccessInfo, + this.viewProjectToTableSchema); + fieldTrimmer.trim(calciteGenPlan); + // Create MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); @@ -1048,7 +1066,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); // The previous rules can pull up projections through join operators, // thus we run the field trimmer again to push them back down - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(cluster, null)); calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, @@ -3019,7 +3037,19 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // 1.1. Recurse over the subqueries to fill the subquery part of the plan for (String subqAlias : qb.getSubqAliases()) { QBExpr qbexpr = qb.getSubqForAlias(subqAlias); - aliasToRel.put(subqAlias, genLogicalPlan(qbexpr)); + RelNode relNode = genLogicalPlan(qbexpr); + aliasToRel.put(subqAlias, relNode); + if (qb.getViewToTabSchema().containsKey(subqAlias)) { + if (relNode instanceof HiveProject) { + if (this.viewProjectToTableSchema == null) { + this.viewProjectToTableSchema = new LinkedHashMap<>(); + } + viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias)); + } else { + throw new SemanticException("View " + subqAlias + " is corresponding to " + + relNode.toString() + ", rather than a HiveProject."); + } + } } // 1.2 Recurse over all the source tables diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 642c227..3abae95 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -36,10 +36,12 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; @@ -107,7 +109,9 @@ private CreateTableDesc createTableDesc; private boolean reduceSinkAddedBySortedDynPartition; - + private Map viewProjectToTableSchema; + private ColumnAccessInfo columnAccessInfo; + public ParseContext() { } @@ -165,7 +169,7 @@ public ParseContext( Map viewAliasToInput, List reduceSinkOperatorsAddedByEnforceBucketingSorting, AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc, - QueryProperties queryProperties) { + QueryProperties queryProperties, Map viewProjectToTableSchema, ColumnAccessInfo columnAccessInfo) { this.conf = conf; this.opToPartPruner = opToPartPruner; this.opToPartList = opToPartList; @@ -192,6 +196,8 @@ public ParseContext( this.analyzeRewrite = analyzeRewrite; this.createTableDesc = createTableDesc; this.queryProperties = queryProperties; + this.viewProjectToTableSchema = viewProjectToTableSchema; + this.columnAccessInfo = columnAccessInfo; } /** @@ -539,4 +545,16 @@ public void setReduceSinkAddedBySortedDynPartition( public boolean isReduceSinkAddedBySortedDynPartition() { return reduceSinkAddedBySortedDynPartition; } + + public Map getViewProjectToTableSchema() { + return viewProjectToTableSchema; + } + + public ColumnAccessInfo getColumnAccessInfo() { + return columnAccessInfo; + } + + public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { + this.columnAccessInfo = columnAccessInfo; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index f04b493..1532758 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -47,6 +47,7 @@ private int numSelDi = 0; private HashMap aliasToTabs; private HashMap aliasToSubq; + private HashMap viewToTabSchema; private HashMap> aliasToProps; private List aliases; private QBParseInfo qbp; @@ -110,6 +111,7 @@ public QB(String outer_id, String alias, boolean isSubQ) { // Must be deterministic order maps - see HIVE-8707 aliasToTabs = new LinkedHashMap(); aliasToSubq = new LinkedHashMap(); + viewToTabSchema = new LinkedHashMap(); aliasToProps = new LinkedHashMap>(); aliases = new ArrayList(); if (alias != null) { @@ -231,15 +233,18 @@ public String getTabNameForAlias(String alias) { return aliasToProps.get(alias.toLowerCase()); } - public void rewriteViewToSubq(String alias, String viewName, QBExpr qbexpr) { + public void rewriteViewToSubq(String alias, String viewName, QBExpr qbexpr, Table tab) { alias = alias.toLowerCase(); String tableName = aliasToTabs.remove(alias); assert (viewName.equals(tableName)); aliasToSubq.put(alias, qbexpr); + if (tab != null) { + viewToTabSchema.put(alias, tab); + } } public void rewriteCTEToSubq(String alias, String cteName, QBExpr qbexpr) { - rewriteViewToSubq(alias, cteName, qbexpr); + rewriteViewToSubq(alias, cteName, qbexpr, null); } public QBJoinTree getQbJoinTree() { @@ -406,4 +411,9 @@ void addEncryptedTargetTablePath(Path p) { } return encryptedTargetTablePaths; } + + public HashMap getViewToTabSchema() { + return viewToTabSchema; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8a06582..e8dca90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -117,6 +117,7 @@ import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.ColumnPruner; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -262,6 +263,7 @@ List> listMapJoinOpsNoReducer; private HashMap opToSamplePruner; private final Map> opToPartToSkewedPruner; + private Map viewProjectToTableSchema; /** * a map for the split sampling, from alias to an instance of SplitSample * that describes percentage and number. @@ -427,7 +429,7 @@ public ParseContext getParseContext() { listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, queryProperties); + analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema, columnAccessInfo); } public CompilationOpContext getOpContext() { @@ -2299,7 +2301,13 @@ public Object dispatch(Node nd, java.util.Stack stack, } QBExpr qbexpr = new QBExpr(alias); doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias); - qb.rewriteViewToSubq(alias, tab_name, qbexpr); + if (!this.skipAuthorization() + && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab); + } + else{ + qb.rewriteViewToSubq(alias, tab_name, qbexpr, null); + } } private boolean isPresent(String[] list, String elem) { @@ -9851,7 +9859,21 @@ public Operator genPlan(QB qb, boolean skipAmbiguityCheck) // Recurse over the subqueries to fill the subquery part of the plan for (String alias : qb.getSubqAliases()) { QBExpr qbexpr = qb.getSubqForAlias(alias); - aliasToOpInfo.put(alias, genPlan(qb, qbexpr)); + Operator operator = genPlan(qb, qbexpr); + aliasToOpInfo.put(alias, operator); + if (qb.getViewToTabSchema().containsKey(alias)) { + // we set viewProjectToTableSchema so that we can leverage ColumnPruner. + if (operator instanceof SelectOperator) { + if (this.viewProjectToTableSchema == null) { + this.viewProjectToTableSchema = new LinkedHashMap<>(); + } + viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema() + .get(alias)); + } else { + throw new SemanticException("View " + alias + " is corresponding to " + + operator.getType().name() + ", rather than a SelectOperator."); + } + } } // Recurse over all the source tables @@ -10372,7 +10394,7 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, queryProperties); + analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema, columnAccessInfo); // 5. Take care of view creation if (createVwDesc != null) { @@ -10414,6 +10436,17 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess()); } + if (viewProjectToTableSchema != null && !viewProjectToTableSchema.isEmpty()) { + // this will trigger the column pruner to collect view column + // authorization info. + pCtx.setColumnAccessInfo(new ColumnAccessInfo()); + Transform t = new ColumnPruner(); + pCtx = t.transform(pCtx); + this.setColumnAccessInfo(pCtx.getColumnAccessInfo()); + // set it back so that the column pruner in the optimizer will not get + // triggered again. + pCtx.setColumnAccessInfo(null); + } // 7. Perform Logical optimization if (LOG.isDebugEnabled()) { LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index fc555ca..6cfd755 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -402,7 +402,7 @@ public ParseContext getParseContext(ParseContext pCtx, List