diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 0d7b92d649..ba4ffcca6c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -142,7 +142,7 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S outputs = sem.getAllOutputs(); linfo = sem.getLineageInfo(); tableAccessInfo = sem.getTableAccessInfo(); - columnAccessInfo = sem.getColumnAccessInfo(); + columnAccessInfo = sem.getDirectColumnAccessInfo(); idToTableNameMap = new HashMap(sem.getIdToTableNameMap()); this.queryId = queryId == null ? makeQueryId() : queryId; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index c570356d8b..1e87b1955b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -78,7 +78,8 @@ protected static final Logger LOG = LoggerFactory.getLogger(HiveRelFieldTrimmer.class); - private ColumnAccessInfo columnAccessInfo; + private ColumnAccessInfo directColumnAccessInfo; + private ColumnAccessInfo allColumnAccessInfo; private Map viewProjectToTableSchema; private final RelBuilder relBuilder; private final boolean fetchStats; @@ -88,9 +89,11 @@ public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { } public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, - ColumnAccessInfo columnAccessInfo, Map viewToTableSchema) { + ColumnAccessInfo directColumnAccessInfo, ColumnAccessInfo allColumnAccessInfo, + Map viewToTableSchema) { this(validator, relBuilder, false); - this.columnAccessInfo = columnAccessInfo; + this.directColumnAccessInfo = directColumnAccessInfo; + this.allColumnAccessInfo = allColumnAccessInfo; this.viewProjectToTableSchema = viewToTableSchema; } @@ -674,10 +677,14 @@ public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, // set columnAccessInfo for ViewColumnAuthorization for (Ord ord : Ord.zip(project.getProjects())) { if (fieldsUsed.get(ord.i)) { - if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null - && this.viewProjectToTableSchema.containsKey(project)) { + if (this.viewProjectToTableSchema != null && this.viewProjectToTableSchema.containsKey(project)) { Table tab = this.viewProjectToTableSchema.get(project); - this.columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); + if (this.directColumnAccessInfo != null) { + this.directColumnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); + } + if (this.allColumnAccessInfo != null) { + this.allColumnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); + } } } } @@ -688,6 +695,15 @@ public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, public TrimResult trimFields(TableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set extraFields) { final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields); + if (this.allColumnAccessInfo != null) { + // Store information about column accessed by the table so it can be used + // to send only this information for column masking + final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable(); + fieldsUsed.asList().stream() + .filter(idx -> idx < tab.getNoOfNonVirtualCols()) + .forEach(idx -> allColumnAccessInfo.add( + tab.getHiveTableMD().getCompleteName(), tab.getHiveTableMD().getAllCols().get(idx).getName())); + } if (fetchStats) { fetchColStats(result.getKey(), tableAccessRel, fieldsUsed, extraFields); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 9457b77589..9f1874c5fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -157,7 +157,14 @@ */ protected LineageInfo linfo; protected TableAccessInfo tableAccessInfo; - protected ColumnAccessInfo columnAccessInfo; + /** + * All columns accessed directly. + */ + protected ColumnAccessInfo directColumnAccessInfo; + /** + * All columns accessed (including through views). + */ + protected ColumnAccessInfo allColumnAccessInfo; protected CacheUsage cacheUsage; @@ -1742,18 +1749,18 @@ public void setTableAccessInfo(TableAccessInfo tableAccessInfo) { * * @return ColumnAccessInfo associated with the query. */ - public ColumnAccessInfo getColumnAccessInfo() { - return columnAccessInfo; + public ColumnAccessInfo getDirectColumnAccessInfo() { + return directColumnAccessInfo; } /** * Sets the column access information. * - * @param columnAccessInfo The ColumnAccessInfo structure that is set immediately after + * @param directColumnAccessInfo The ColumnAccessInfo structure that is set immediately after * the optimization phase. */ - public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { - this.columnAccessInfo = columnAccessInfo; + public void setDirectColumnAccessInfo(ColumnAccessInfo directColumnAccessInfo) { + this.directColumnAccessInfo = directColumnAccessInfo; } public ColumnAccessInfo getUpdateColumnAccessInfo() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 4762335a0f..a014c9e082 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -307,6 +307,7 @@ private SemanticException semanticException; private boolean runCBO = true; private boolean disableSemJoinReordering = true; + private EnumSet profilesCBO; private static final CommonToken FROM_TOKEN = @@ -1474,13 +1475,17 @@ RelNode logicalPlan() throws SemanticException { RelNode optimizedOptiqPlan = null; CalcitePlannerAction calcitePlannerAction = null; - if (this.columnAccessInfo == null) { - this.columnAccessInfo = new ColumnAccessInfo(); + if (this.directColumnAccessInfo == null) { + this.directColumnAccessInfo = new ColumnAccessInfo(); + } + if (this.allColumnAccessInfo == null) { + this.allColumnAccessInfo = new ColumnAccessInfo(); } calcitePlannerAction = new CalcitePlannerAction( prunedPartitions, ctx.getStatsSource(), - this.columnAccessInfo); + this.directColumnAccessInfo, + this.allColumnAccessInfo); try { optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks @@ -1715,7 +1720,8 @@ private RowResolver genRowResolver(Operator op, QB qb) { private RelOptSchema relOptSchema; private final Map partitionCache; private final Map colStatsCache; - private final ColumnAccessInfo columnAccessInfo; + private final ColumnAccessInfo directColumnAccessInfo; + private final ColumnAccessInfo allColumnAccessInfo; private Map viewProjectToTableSchema; //correlated vars across subqueries within same query needs to have different ID @@ -1735,11 +1741,13 @@ private RowResolver genRowResolver(Operator op, QB qb) { CalcitePlannerAction( Map partitionCache, StatsSource statsSource, - ColumnAccessInfo columnAccessInfo) { + ColumnAccessInfo directColumnAccessInfo, + ColumnAccessInfo allColumnAccessInfo) { this.partitionCache = partitionCache; this.statsSource = statsSource; this.colStatsCache = ctx.getOpContext().getColStatsCache(); - this.columnAccessInfo = columnAccessInfo; + this.directColumnAccessInfo = directColumnAccessInfo; + this.allColumnAccessInfo = allColumnAccessInfo; } @Override @@ -1780,8 +1788,8 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // We need to get the ColumnAccessInfo and viewToTableSchema for views. HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.columnAccessInfo, - this.viewProjectToTableSchema); + HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.directColumnAccessInfo, + this.allColumnAccessInfo, this.viewProjectToTableSchema); fieldTrimmer.trim(calciteGenPlan); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 31068cb8c3..e399ea2553 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -239,8 +239,8 @@ protected void setUpAccessControlInfoForUpdate(Table mTable, Map cte basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), null); } } else { - List colNames = new ArrayList<>(); - List colTypes = new ArrayList<>(); - extractColumnInfos(table, colNames, colTypes); + List colNames; + List colTypes; + if (isCBOExecuted() && this.allColumnAccessInfo != null && + this.allColumnAccessInfo.getTableToColumnAccessMap().containsKey(table.getCompleteName())) { + colNames = this.allColumnAccessInfo.getTableToColumnAccessMap().get(table.getCompleteName()); + Map colNameToType = table.getAllCols().stream() + .collect(Collectors.toMap(FieldSchema::getName, FieldSchema::getType)); + colTypes = colNames.stream().map(colNameToType::get).collect(Collectors.toList()); + } else { + colNames = new ArrayList<>(); + colTypes = new ArrayList<>(); + extractColumnInfos(table, colNames, colTypes); + } basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), alias, astNode, table.isView(), table.isNonNative())); @@ -12602,7 +12612,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE pCtx = optm.optimize(); if (pCtx.getColumnAccessInfo() != null) { // set ColumnAccessInfo for view column authorization - setColumnAccessInfo(pCtx.getColumnAccessInfo()); + setDirectColumnAccessInfo(pCtx.getColumnAccessInfo()); } if (LOG.isDebugEnabled()) { LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); @@ -12616,7 +12626,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx); // view column access info is carried by this.getColumnAccessInfo(). - setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo())); + setDirectColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getDirectColumnAccessInfo())); } // 9. Optimize Physical op tree & Translate to target execution engine (MR, @@ -12650,7 +12660,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE // 11. put accessed columns to readEntity if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { - putAccessedColumnsToReadEntity(inputs, columnAccessInfo); + putAccessedColumnsToReadEntity(inputs, directColumnAccessInfo); } if (isCacheEnabled && lookupInfo != null) { @@ -15088,7 +15098,7 @@ private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry, boolean ne queryState.setCommandType(cacheEntry.getQueryInfo().getHiveOperation()); resultSchema = cacheEntry.getQueryInfo().getResultSchema(); setTableAccessInfo(cacheEntry.getQueryInfo().getTableAccessInfo()); - setColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo()); + setDirectColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo()); inputs.addAll(cacheEntry.getQueryInfo().getInputs()); // Set recursive traversal in case the cached query was UNION generated by Tez. @@ -15102,7 +15112,7 @@ private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry, boolean ne private QueryResultsCache.QueryInfo createCacheQueryInfoForQuery(QueryResultsCache.LookupInfo lookupInfo) { long queryTime = SessionState.get().getQueryCurrentTimestamp().toEpochMilli(); return new QueryResultsCache.QueryInfo(queryTime, lookupInfo, queryState.getHiveOperation(), - resultSchema, getTableAccessInfo(), getColumnAccessInfo(), inputs); + resultSchema, getTableAccessInfo(), getDirectColumnAccessInfo(), inputs); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 179021e2b9..a8ea2932a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -216,8 +216,8 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { // Add the setRCols to the input list for (String colName : setRCols) { - if (columnAccessInfo != null) { //assuming this means we are not doing Auth - columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), + if (directColumnAccessInfo != null) { //assuming this means we are not doing Auth + directColumnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java index bbff049411..d418c75c4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java @@ -140,7 +140,7 @@ private static void authorizeInputs(HiveOperation op, BaseSemanticAnalyzer sem, } Table tbl = read.getTable(); if (tbl.isView() && sem instanceof SemanticAnalyzer) { - tab2Cols.put(tbl, sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName())); + tab2Cols.put(tbl, sem.getDirectColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName())); } if (read.getPartition() != null) { Partition partition = read.getPartition(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java index e9a278dc60..cab223f6e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java @@ -57,8 +57,8 @@ static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, SessionS HiveOperationType hiveOpType = HiveOperationType.valueOf(op.name()); // colAccessInfo is set only in case of SemanticAnalyzer - Map> selectTab2Cols = sem.getColumnAccessInfo() != null - ? sem.getColumnAccessInfo().getTableToColumnAccessMap() : null; + Map> selectTab2Cols = sem.getDirectColumnAccessInfo() != null + ? sem.getDirectColumnAccessInfo().getTableToColumnAccessMap() : null; Map> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null;