diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 10bd97b..3253146 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -38,6 +38,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; + import org.apache.commons.lang.StringUtils; import org.apache.hadoop.mapreduce.MRJobConfig; import org.slf4j.Logger; @@ -85,6 +86,7 @@ import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; @@ -747,6 +749,10 @@ public static void doAuthorization(BaseSemanticAnalyzer sem, String command) continue; } Table tbl = read.getTable(); + if (tbl.isView() && sem instanceof SemanticAnalyzer) { + tab2Cols.put(tbl, + sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getTableName())); + } if (read.getPartition() != null) { Partition partition = read.getPartition(); tbl = partition.getTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java index c353e3e..7e39d77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -133,6 +134,9 @@ public ParseContext transform(ParseContext pactx) throws SemanticException { ArrayList topNodes = new ArrayList(); topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); + // set it back so that column pruner in the optimizer will not do the + // view column authorization again even if it is triggered again. + pGraphContext.setNeedViewColumnAuthorization(false); return pGraphContext; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 78bce23..7638ba0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -781,6 +782,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, // by now, 'prunedCols' are columns used by child operators, and 'columns' // are columns used by this select operator. List originalOutputColumnNames = conf.getOutputColumnNames(); + // get view column authorization. + if (cppCtx.getParseContext().getColumnAccessInfo() != null + && cppCtx.getParseContext().getViewProjectToTableSchema() != null + && cppCtx.getParseContext().getViewProjectToTableSchema().containsKey(op)) { + for (String col : cols) { + int index = originalOutputColumnNames.indexOf(col); + Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op); + cppCtx.getParseContext().getColumnAccessInfo() + .add(tab.getTableName(), tab.getCols().get(index).getName()); + } + } if (cols.size() < originalOutputColumnNames.size()) { ArrayList newColList = new ArrayList(); ArrayList newOutputColumnNames = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 18145ae..997b82c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -21,8 +21,10 @@ import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelCollation; @@ -50,7 +52,6 @@ import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.Stacks; import org.apache.calcite.util.Util; import org.apache.calcite.util.mapping.IntPair; import org.apache.calcite.util.mapping.Mapping; @@ -58,8 +59,11 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -70,11 +74,23 @@ private RelBuilder relBuilder; + private ColumnAccessInfo columnAccessInfo; + + private Map viewProjectToTableSchema; + public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { super(validator, relBuilder); this.relBuilder = relBuilder; } + public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, + ColumnAccessInfo columnAccessInfo, Map viewToTableSchema) { + super(validator, relBuilder); + this.relBuilder = relBuilder; + this.columnAccessInfo = columnAccessInfo; + this.viewProjectToTableSchema = viewToTableSchema; + } + /** * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}. @@ -358,4 +374,24 @@ protected RexNode handle(RexFieldAccess fieldAccess) { } return new TrimResult(r, mapping); } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalProject}. + */ + public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, + Set extraFields) { + // set columnAccessInfo for ViewColumnAuthorization + for (Ord ord : Ord.zip(project.getProjects())) { + if (fieldsUsed.get(ord.i)) { + if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null + && this.viewProjectToTableSchema.containsKey(project)) { + Table tab = this.viewProjectToTableSchema.get(project); + this.columnAccessInfo.add(tab.getTableName(), tab.getCols().get(ord.i).getName()); + } + } + } + return super.trimFields(project, fieldsUsed, extraFields); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 809affb..64204e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -695,7 +695,12 @@ private static void replaceASTChild(ASTNode child, ASTNode newChild) { ASTNode getOptimizedAST() throws SemanticException { ASTNode optiqOptimizedAST = null; RelNode optimizedOptiqPlan = null; - CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + + CalcitePlannerAction calcitePlannerAction = null; + if (this.columnAccessInfo == null) { + this.columnAccessInfo = new ColumnAccessInfo(); + } + calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, this.columnAccessInfo); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -717,7 +722,11 @@ ASTNode getOptimizedAST() throws SemanticException { */ Operator getOptimizedHiveOPDag() throws SemanticException { RelNode optimizedOptiqPlan = null; - CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + CalcitePlannerAction calcitePlannerAction = null; + if (this.columnAccessInfo == null) { + this.columnAccessInfo = new ColumnAccessInfo(); + } + calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, this.columnAccessInfo); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -879,14 +888,17 @@ private RowResolver genRowResolver(Operator op, QB qb) { private RelOptCluster cluster; private RelOptSchema relOptSchema; private final Map partitionCache; + private final ColumnAccessInfo columnAccessInfo; + private Map viewProjectToTableSchema; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. LinkedHashMap relToHiveRR = new LinkedHashMap(); LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); - CalcitePlannerAction(Map partitionCache) { + CalcitePlannerAction(Map partitionCache, ColumnAccessInfo columnAccessInfo) { this.partitionCache = partitionCache; + this.columnAccessInfo = columnAccessInfo; } @Override @@ -928,6 +940,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); + // We need to get the ColumnAccessInfo and viewToTableSchema for views. + HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + HiveRelFactories.HIVE_BUILDER.create(cluster, null), this.columnAccessInfo, + this.viewProjectToTableSchema); + fieldTrimmer.trim(calciteGenPlan); + // Create MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); @@ -1048,7 +1066,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); // The previous rules can pull up projections through join operators, // thus we run the field trimmer again to push them back down - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(cluster, null)); calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, @@ -3019,7 +3037,19 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // 1.1. Recurse over the subqueries to fill the subquery part of the plan for (String subqAlias : qb.getSubqAliases()) { QBExpr qbexpr = qb.getSubqForAlias(subqAlias); - aliasToRel.put(subqAlias, genLogicalPlan(qbexpr)); + RelNode relNode = genLogicalPlan(qbexpr); + aliasToRel.put(subqAlias, relNode); + if (qb.getViewToTabSchema().containsKey(subqAlias)) { + if (relNode instanceof HiveProject) { + if (this.viewProjectToTableSchema == null) { + this.viewProjectToTableSchema = new LinkedHashMap<>(); + } + viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias)); + } else { + throw new SemanticException("View " + subqAlias + " is corresponding to " + + relNode.toString() + ", rather than a HiveProject."); + } + } } // 1.2 Recurse over all the source tables diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 642c227..4f784d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -36,10 +36,12 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; @@ -107,6 +109,9 @@ private CreateTableDesc createTableDesc; private boolean reduceSinkAddedBySortedDynPartition; + private Map viewProjectToViewSchema; + private ColumnAccessInfo columnAccessInfo; + private boolean needViewColumnAuthorization; public ParseContext() { } @@ -165,7 +170,7 @@ public ParseContext( Map viewAliasToInput, List reduceSinkOperatorsAddedByEnforceBucketingSorting, AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc, - QueryProperties queryProperties) { + QueryProperties queryProperties, Map viewProjectToTableSchema) { this.conf = conf; this.opToPartPruner = opToPartPruner; this.opToPartList = opToPartList; @@ -192,6 +197,14 @@ public ParseContext( this.analyzeRewrite = analyzeRewrite; this.createTableDesc = createTableDesc; this.queryProperties = queryProperties; + this.viewProjectToViewSchema = viewProjectToTableSchema; + this.needViewColumnAuthorization = viewProjectToTableSchema != null + && !viewProjectToTableSchema.isEmpty(); + if (this.needViewColumnAuthorization) { + // this will trigger the column pruner to collect view column + // authorization info. + this.columnAccessInfo = new ColumnAccessInfo(); + } } /** @@ -539,4 +552,24 @@ public void setReduceSinkAddedBySortedDynPartition( public boolean isReduceSinkAddedBySortedDynPartition() { return reduceSinkAddedBySortedDynPartition; } + + public Map getViewProjectToTableSchema() { + return viewProjectToViewSchema; + } + + public ColumnAccessInfo getColumnAccessInfo() { + return columnAccessInfo; + } + + public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { + this.columnAccessInfo = columnAccessInfo; + } + + public boolean isNeedViewColumnAuthorization() { + return needViewColumnAuthorization; + } + + public void setNeedViewColumnAuthorization(boolean needViewColumnAuthorization) { + this.needViewColumnAuthorization = needViewColumnAuthorization; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index f04b493..91352b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -47,6 +47,7 @@ private int numSelDi = 0; private HashMap aliasToTabs; private HashMap aliasToSubq; + private HashMap viewAliasToViewSchema; private HashMap> aliasToProps; private List aliases; private QBParseInfo qbp; @@ -110,6 +111,7 @@ public QB(String outer_id, String alias, boolean isSubQ) { // Must be deterministic order maps - see HIVE-8707 aliasToTabs = new LinkedHashMap(); aliasToSubq = new LinkedHashMap(); + viewAliasToViewSchema = new LinkedHashMap(); aliasToProps = new LinkedHashMap>(); aliases = new ArrayList(); if (alias != null) { @@ -231,15 +233,18 @@ public String getTabNameForAlias(String alias) { return aliasToProps.get(alias.toLowerCase()); } - public void rewriteViewToSubq(String alias, String viewName, QBExpr qbexpr) { + public void rewriteViewToSubq(String alias, String viewName, QBExpr qbexpr, Table tab) { alias = alias.toLowerCase(); String tableName = aliasToTabs.remove(alias); assert (viewName.equals(tableName)); aliasToSubq.put(alias, qbexpr); + if (tab != null) { + viewAliasToViewSchema.put(alias, tab); + } } public void rewriteCTEToSubq(String alias, String cteName, QBExpr qbexpr) { - rewriteViewToSubq(alias, cteName, qbexpr); + rewriteViewToSubq(alias, cteName, qbexpr, null); } public QBJoinTree getQbJoinTree() { @@ -406,4 +411,9 @@ void addEncryptedTargetTablePath(Path p) { } return encryptedTargetTablePaths; } + + public HashMap getViewToTabSchema() { + return viewAliasToViewSchema; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8a06582..4d62af1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -117,6 +117,7 @@ import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.ColumnPruner; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -262,6 +263,7 @@ List> listMapJoinOpsNoReducer; private HashMap opToSamplePruner; private final Map> opToPartToSkewedPruner; + private Map viewProjectToTableSchema; /** * a map for the split sampling, from alias to an instance of SplitSample * that describes percentage and number. @@ -427,7 +429,7 @@ public ParseContext getParseContext() { listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, queryProperties); + analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema); } public CompilationOpContext getOpContext() { @@ -2299,7 +2301,13 @@ public Object dispatch(Node nd, java.util.Stack stack, } QBExpr qbexpr = new QBExpr(alias); doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias); - qb.rewriteViewToSubq(alias, tab_name, qbexpr); + if (!this.skipAuthorization() + && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab); + } + else{ + qb.rewriteViewToSubq(alias, tab_name, qbexpr, null); + } } private boolean isPresent(String[] list, String elem) { @@ -9851,7 +9859,21 @@ public Operator genPlan(QB qb, boolean skipAmbiguityCheck) // Recurse over the subqueries to fill the subquery part of the plan for (String alias : qb.getSubqAliases()) { QBExpr qbexpr = qb.getSubqForAlias(alias); - aliasToOpInfo.put(alias, genPlan(qb, qbexpr)); + Operator operator = genPlan(qb, qbexpr); + aliasToOpInfo.put(alias, operator); + if (qb.getViewToTabSchema().containsKey(alias)) { + // we set viewProjectToTableSchema so that we can leverage ColumnPruner. + if (operator instanceof SelectOperator) { + if (this.viewProjectToTableSchema == null) { + this.viewProjectToTableSchema = new LinkedHashMap<>(); + } + viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema() + .get(alias)); + } else { + throw new SemanticException("View " + alias + " is corresponding to " + + operator.getType().name() + ", rather than a SelectOperator."); + } + } } // Recurse over all the source tables @@ -10372,7 +10394,7 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, queryProperties); + analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema); // 5. Take care of view creation if (createVwDesc != null) { @@ -10422,6 +10444,10 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce optm.setPctx(pCtx); optm.initialize(conf); pCtx = optm.optimize(); + if (pCtx.getColumnAccessInfo() != null) { + // set ColumnAccessInfo for view column authorization + setColumnAccessInfo(pCtx.getColumnAccessInfo()); + } FetchTask origFetchTask = pCtx.getFetchTask(); if (LOG.isDebugEnabled()) { LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index fc555ca..7415078 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -402,7 +402,7 @@ public ParseContext getParseContext(ParseContext pCtx, List