diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 8c764e2be5..2eb65918c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -33,7 +33,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; @@ -59,11 +58,9 @@ import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.LockType; -import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.cache.results.CacheUsage; @@ -75,9 +72,6 @@ import org.apache.hadoop.hive.ql.exec.DagUtils; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionInfo.FunctionType; -import org.apache.hadoop.hive.ql.exec.FunctionUtils; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -89,7 +83,6 @@ import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.Entity; -import org.apache.hadoop.hive.ql.hooks.Entity.Type; import org.apache.hadoop.hive.ql.hooks.HookContext; import org.apache.hadoop.hive.ql.hooks.HookUtils; import org.apache.hadoop.hive.ql.hooks.PrivateHookContext; @@ -106,24 +99,17 @@ import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.formatting.JsonMetaDataFormatter; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatter; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; -import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; @@ -131,13 +117,7 @@ import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivObjectActionType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; +import org.apache.hadoop.hive.ql.security.authorization.command.CommandAuthorizer; import org.apache.hadoop.hive.ql.session.LineageState; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; @@ -157,8 +137,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Sets; - public class Driver implements IDriver { @@ -715,7 +693,7 @@ public void run() { // As both admin or operation owner can perform the operation. // Which is not directly supported in authorizer if (queryState.getHiveOperation() != HiveOperation.KILL_QUERY) { - doAuthorization(queryState.getHiveOperation(), sem, command); + CommandAuthorizer.doAuthorization(queryState.getHiveOperation(), sem, command); } } catch (AuthorizationException authExp) { console.printError("Authorization failed:" + authExp.getMessage() @@ -1068,392 +1046,6 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, return ret; } - /** - * Do authorization using post semantic analysis information in the semantic analyzer - * The original command is also passed so that authorization interface can provide - * more useful information in logs. - * @param sem SemanticAnalyzer used to parse input query - * @param command input query - * @throws HiveException - * @throws AuthorizationException - */ - public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, String command) - throws HiveException, AuthorizationException { - SessionState ss = SessionState.get(); - Hive db = sem.getDb(); - - Set additionalInputs = new HashSet(); - for (Entity e : sem.getInputs()) { - if (e.getType() == Entity.Type.PARTITION) { - additionalInputs.add(new ReadEntity(e.getTable())); - } - } - // skipping the auth check for the "CREATE DATABASE" operation if database already exists - // we know that if the database already exists then "CREATE DATABASE" operation will fail. - if(op.equals(HiveOperation.CREATEDATABASE)){ - for (WriteEntity e : sem.getOutputs()) { - if(e.getType() == Entity.Type.DATABASE && db.databaseExists(e.getName().split(":")[1])){ - return; - } - } - } - - Set additionalOutputs = new HashSet(); - for (WriteEntity e : sem.getOutputs()) { - if (e.getType() == Entity.Type.PARTITION) { - additionalOutputs.add(new WriteEntity(e.getTable(), e.getWriteType())); - } - } - - // The following union operation returns a union, which traverses over the - // first set once and then then over each element of second set, in order, - // that is not contained in first. This means it doesn't replace anything - // in first set, and would preserve the WriteType in WriteEntity in first - // set in case of outputs list. - Set inputs = Sets.union(sem.getInputs(), additionalInputs); - Set outputs = Sets.union(sem.getOutputs(), additionalOutputs); - - if (ss.isAuthorizationModeV2()) { - // get mapping of tables to columns used - ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo(); - // colAccessInfo is set only in case of SemanticAnalyzer - Map> selectTab2Cols = colAccessInfo != null - ? colAccessInfo.getTableToColumnAccessMap() : null; - Map> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null - ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null; - - // convert to List as above Set was created using Sets.union (for reasons - // explained there) - // but that Set is immutable - List inputList = new ArrayList(inputs); - List outputList = new ArrayList(outputs); - - // add permanent UDFs being used - inputList.addAll(getPermanentFunctionEntities(ss)); - - doAuthorizationV2(ss, op, inputList, outputList, command, selectTab2Cols, updateTab2Cols); - return; - } - if (op == null) { - throw new HiveException("Operation should not be null"); - } - HiveAuthorizationProvider authorizer = ss.getAuthorizer(); - if (op.equals(HiveOperation.CREATEDATABASE)) { - authorizer.authorize( - op.getInputRequiredPrivileges(), op.getOutputRequiredPrivileges()); - } else if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) - || op.equals(HiveOperation.CREATETABLE)) { - authorizer.authorize( - db.getDatabase(SessionState.get().getCurrentDatabase()), null, - HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); - } else { - if (op.equals(HiveOperation.IMPORT)) { - ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem; - if (!isa.existsTable()) { - authorizer.authorize( - db.getDatabase(SessionState.get().getCurrentDatabase()), null, - HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); - } - } - } - if (outputs != null && outputs.size() > 0) { - for (WriteEntity write : outputs) { - if (write.isDummy() || write.isPathType()) { - continue; - } - if (write.getType() == Entity.Type.DATABASE) { - if (!op.equals(HiveOperation.IMPORT)){ - // We skip DB check for import here because we already handle it above - // as a CTAS check. - authorizer.authorize(write.getDatabase(), - null, op.getOutputRequiredPrivileges()); - } - continue; - } - - if (write.getType() == WriteEntity.Type.PARTITION) { - Partition part = db.getPartition(write.getTable(), write - .getPartition().getSpec(), false); - if (part != null) { - authorizer.authorize(write.getPartition(), null, - op.getOutputRequiredPrivileges()); - continue; - } - } - - if (write.getTable() != null) { - authorizer.authorize(write.getTable(), null, - op.getOutputRequiredPrivileges()); - } - } - } - - if (inputs != null && inputs.size() > 0) { - Map> tab2Cols = new HashMap>(); - Map> part2Cols = new HashMap>(); - - //determine if partition level privileges should be checked for input tables - Map tableUsePartLevelAuth = new HashMap(); - for (ReadEntity read : inputs) { - if (read.isDummy() || read.isPathType() || read.getType() == Entity.Type.DATABASE) { - continue; - } - Table tbl = read.getTable(); - if ((read.getPartition() != null) || (tbl != null && tbl.isPartitioned())) { - String tblName = tbl.getTableName(); - if (tableUsePartLevelAuth.get(tblName) == null) { - boolean usePartLevelPriv = (tbl.getParameters().get( - "PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE" - .equalsIgnoreCase(tbl.getParameters().get( - "PARTITION_LEVEL_PRIVILEGE")))); - if (usePartLevelPriv) { - tableUsePartLevelAuth.put(tblName, Boolean.TRUE); - } else { - tableUsePartLevelAuth.put(tblName, Boolean.FALSE); - } - } - } - } - - // column authorization is checked through table scan operators. - getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth); - - // cache the results for table authorization - Set tableAuthChecked = new HashSet(); - for (ReadEntity read : inputs) { - // if read is not direct, we do not need to check its autho. - if (read.isDummy() || read.isPathType() || !read.isDirect()) { - continue; - } - if (read.getType() == Entity.Type.DATABASE) { - authorizer.authorize(read.getDatabase(), op.getInputRequiredPrivileges(), null); - continue; - } - Table tbl = read.getTable(); - if (tbl.isView() && sem instanceof SemanticAnalyzer) { - tab2Cols.put(tbl, - sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName())); - } - if (read.getPartition() != null) { - Partition partition = read.getPartition(); - tbl = partition.getTable(); - // use partition level authorization - if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) { - List cols = part2Cols.get(partition); - if (cols != null && cols.size() > 0) { - authorizer.authorize(partition.getTable(), - partition, cols, op.getInputRequiredPrivileges(), - null); - } else { - authorizer.authorize(partition, - op.getInputRequiredPrivileges(), null); - } - continue; - } - } - - // if we reach here, it means it needs to do a table authorization - // check, and the table authorization may already happened because of other - // partitions - if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && - !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) { - List cols = tab2Cols.get(tbl); - if (cols != null && cols.size() > 0) { - authorizer.authorize(tbl, null, cols, - op.getInputRequiredPrivileges(), null); - } else { - authorizer.authorize(tbl, op.getInputRequiredPrivileges(), - null); - } - tableAuthChecked.add(tbl.getTableName()); - } - } - - } - } - - private static List getPermanentFunctionEntities(SessionState ss) throws HiveException { - List functionEntities = new ArrayList<>(); - for (Entry permFunction : ss.getCurrentFunctionsInUse().entrySet()) { - if (permFunction.getValue().getFunctionType() != FunctionType.PERSISTENT) { - // Only permanent functions need to be authorized. - // Built-in function access is allowed to all users. - // If user can create a temp function, they should be able to use it - // without additional authorization. - continue; - } - functionEntities.add(createReadEntity(permFunction.getKey(), permFunction.getValue())); - } - return functionEntities; - } - - private static ReadEntity createReadEntity(String functionName, FunctionInfo functionInfo) - throws HiveException { - String[] qualFunctionName = FunctionUtils.getQualifiedFunctionNameParts(functionName); - // this is only for the purpose of authorization, only the name matters. - Database db = new Database(qualFunctionName[0], "", "", null); - return new ReadEntity(db, qualFunctionName[1], functionInfo.getClassName(), Type.FUNCTION); - } - - private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, - Map> tab2Cols, Map> part2Cols, - Map tableUsePartLevelAuth) throws HiveException { - // for a select or create-as-select query, populate the partition to column - // (par2Cols) or - // table to columns mapping (tab2Cols) - if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) { - SemanticAnalyzer querySem = (SemanticAnalyzer) sem; - ParseContext parseCtx = querySem.getParseContext(); - - for (Map.Entry topOpMap : querySem.getParseContext().getTopOps() - .entrySet()) { - TableScanOperator tableScanOp = topOpMap.getValue(); - if (!tableScanOp.isInsideView()) { - Table tbl = tableScanOp.getConf().getTableMetadata(); - List neededColumnIds = tableScanOp.getNeededColumnIDs(); - List columns = tbl.getCols(); - List cols = new ArrayList(); - for (int i = 0; i < neededColumnIds.size(); i++) { - cols.add(columns.get(neededColumnIds.get(i)).getName()); - } - // map may not contain all sources, since input list may have been - // optimized out - // or non-existent tho such sources may still be referenced by the - // TableScanOperator - // if it's null then the partition probably doesn't exist so let's use - // table permission - if (tbl.isPartitioned() - && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) { - String alias_id = topOpMap.getKey(); - - PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id); - Set parts = partsList.getPartitions(); - for (Partition part : parts) { - List existingCols = part2Cols.get(part); - if (existingCols == null) { - existingCols = new ArrayList(); - } - existingCols.addAll(cols); - part2Cols.put(part, existingCols); - } - } else { - List existingCols = tab2Cols.get(tbl); - if (existingCols == null) { - existingCols = new ArrayList(); - } - existingCols.addAll(cols); - tab2Cols.put(tbl, existingCols); - } - } - } - } - } - - private static void doAuthorizationV2(SessionState ss, HiveOperation op, List inputs, - List outputs, String command, Map> tab2cols, - Map> updateTab2Cols) throws HiveException { - - /* comment for reviewers -> updateTab2Cols needed to be separate from tab2cols because if I - pass tab2cols to getHivePrivObjects for the output case it will trip up insert/selects, - since the insert will get passed the columns from the select. - */ - - HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); - authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); - authzContextBuilder.setForwardedAddresses(ss.getForwardedAddresses()); - authzContextBuilder.setCommandString(command); - - HiveOperationType hiveOpType = getHiveOperationType(op); - List inputsHObjs = getHivePrivObjects(inputs, tab2cols); - List outputHObjs = getHivePrivObjects(outputs, updateTab2Cols); - - ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); - } - - private static List getHivePrivObjects( - List privObjects, Map> tableName2Cols) { - List hivePrivobjs = new ArrayList(); - if(privObjects == null){ - return hivePrivobjs; - } - for(Entity privObject : privObjects){ - HivePrivilegeObjectType privObjType = - AuthorizationUtils.getHivePrivilegeObjectType(privObject.getType()); - if(privObject.isDummy()) { - //do not authorize dummy readEntity or writeEntity - continue; - } - if(privObject instanceof ReadEntity && !((ReadEntity)privObject).isDirect()){ - // In case of views, the underlying views or tables are not direct dependencies - // and are not used for authorization checks. - // This ReadEntity represents one of the underlying tables/views, so skip it. - // See description of the isDirect in ReadEntity - continue; - } - if(privObject instanceof WriteEntity && ((WriteEntity)privObject).isTempURI()){ - //do not authorize temporary uris - continue; - } - if (privObject.getTyp() == Type.TABLE - && (privObject.getT() == null || privObject.getT().isTemporary())) { - // skip temporary tables from authorization - continue; - } - //support for authorization on partitions needs to be added - String dbname = null; - String objName = null; - List partKeys = null; - List columns = null; - String className = null; - String ownerName = null; - PrincipalType ownerType = null; - switch(privObject.getType()){ - case DATABASE: - dbname = privObject.getDatabase().getName(); - ownerName = privObject.getDatabase().getOwnerName(); - ownerType = privObject.getDatabase().getOwnerType(); - break; - case TABLE: - dbname = privObject.getTable().getDbName(); - objName = privObject.getTable().getTableName(); - columns = tableName2Cols == null ? null : - tableName2Cols.get(Table.getCompleteName(dbname, objName)); - ownerName = privObject.getTable().getOwner(); - ownerType = privObject.getTable().getOwnerType(); - break; - case DFS_DIR: - case LOCAL_DIR: - objName = privObject.getD().toString(); - break; - case FUNCTION: - if(privObject.getDatabase() != null) { - dbname = privObject.getDatabase().getName(); - } - objName = privObject.getFunctionName(); - className = privObject.getClassName(); - break; - case DUMMYPARTITION: - case PARTITION: - // not currently handled - continue; - case SERVICE_NAME: - objName = privObject.getServiceName(); - break; - default: - throw new AssertionError("Unexpected object type"); - } - HivePrivObjectActionType actionType = AuthorizationUtils.getActionType(privObject); - HivePrivilegeObject hPrivObject = new HivePrivilegeObject(privObjType, dbname, objName, - partKeys, columns, actionType, null, className, ownerName, ownerType); - hivePrivobjs.add(hPrivObject); - } - return hivePrivobjs; - } - - private static HiveOperationType getHiveOperationType(HiveOperation op) { - return HiveOperationType.valueOf(op.name()); - } - @Override public HiveConf getConf() { return conf; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 4a71a5b411..a0f3e15b63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LockComponent; -import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -66,6 +65,7 @@ import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory; +import org.apache.hadoop.hive.ql.security.authorization.command.CommandAuthorizer; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.io.IOUtils; import org.apache.hive.common.util.AnnotationUtils; @@ -525,7 +525,7 @@ public void exception(Exception exception) { SessionState.get().setActiveAuthorizer(authorizer); try { - Driver.doAuthorization(queryState.getHiveOperation(), analyzer, ""); + CommandAuthorizer.doAuthorization(queryState.getHiveOperation(), analyzer, ""); } finally { SessionState.get().setActiveAuthorizer(delegate); } diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizer.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizer.java new file mode 100644 index 0000000000..cc7b49a058 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizer.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security.authorization.command; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.session.SessionState; + +import com.google.common.collect.Sets; + +/** + * Does authorization using post semantic analysis information from the semantic analyzer. + */ +public final class CommandAuthorizer { + private CommandAuthorizer() { + throw new UnsupportedOperationException("CommandAuthorizer should not be instantiated"); + } + + /** @param command Passed so that authorization interface can provide more useful information in logs. */ + public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, String command) + throws HiveException, AuthorizationException { + if (skip(op, sem)) { + return; + } + + SessionState ss = SessionState.get(); + + Set inputs = getInputs(sem); + Set outputs = getOutputs(sem); + + if (!ss.isAuthorizationModeV2()) { + CommandAuthorizerV1.doAuthorization(op, sem, ss, inputs, outputs); + } else { + CommandAuthorizerV2.doAuthorization(op, sem, ss, inputs, outputs, command); + } + } + + private static boolean skip(HiveOperation op, BaseSemanticAnalyzer sem) throws HiveException { + // skipping the auth check for the "CREATE DATABASE" operation if database already exists + // we know that if the database already exists then "CREATE DATABASE" operation will fail. + if (op == HiveOperation.CREATEDATABASE) { + for (WriteEntity e : sem.getOutputs()) { + if(e.getType() == Entity.Type.DATABASE && sem.getDb().databaseExists(e.getName().split(":")[1])){ + return true; + } + } + } + + return false; + } + + private static Set getInputs(BaseSemanticAnalyzer sem) { + Set additionalInputs = new HashSet(); + for (Entity e : sem.getInputs()) { + if (e.getType() == Entity.Type.PARTITION) { + additionalInputs.add(new ReadEntity(e.getTable())); + } + } + + // Sets.union keeps the values from the first set if they are present in both + return Sets.union(sem.getInputs(), additionalInputs); + } + + private static Set getOutputs(BaseSemanticAnalyzer sem) { + Set additionalOutputs = new HashSet(); + for (WriteEntity e : sem.getOutputs()) { + if (e.getType() == Entity.Type.PARTITION) { + additionalOutputs.add(new WriteEntity(e.getTable(), e.getWriteType())); + } + } + + // Sets.union keeps the values from the first set if they are present in both + return Sets.union(sem.getOutputs(), additionalOutputs); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java new file mode 100644 index 0000000000..942d078289 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV1.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security.authorization.command; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + * Command authorization, old type. + */ +class CommandAuthorizerV1 { + private CommandAuthorizerV1() { + throw new UnsupportedOperationException("CommandAuthorizerV1 should not be instantiated"); + } + + static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, SessionState ss, + Set inputs, Set outputs) throws HiveException { + if (op == null) { + throw new HiveException("Operation should not be null"); + } + + Hive db = sem.getDb(); + HiveAuthorizationProvider authorizer = ss.getAuthorizer(); + + authorizeOperation(op, sem, db, authorizer); + authorizeOutputs(op, outputs, db, authorizer); + authorizeInputs(op, sem, inputs, authorizer); + } + + private static void authorizeOperation(HiveOperation op, BaseSemanticAnalyzer sem, Hive db, + HiveAuthorizationProvider authorizer) throws HiveException { + if (op.equals(HiveOperation.CREATEDATABASE)) { + authorizer.authorize(op.getInputRequiredPrivileges(), op.getOutputRequiredPrivileges()); + } else if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) { + authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, + HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); + } else if (op.equals(HiveOperation.IMPORT)) { + ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem; + if (!isa.existsTable()) { + authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, + HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); + } + } + } + + private static void authorizeOutputs(HiveOperation op, Set outputs, Hive db, + HiveAuthorizationProvider authorizer) throws HiveException { + if (CollectionUtils.isEmpty(outputs)) { + return; + } + + for (WriteEntity write : outputs) { + if (write.isDummy() || write.isPathType()) { + continue; + } + if (write.getType() == Entity.Type.DATABASE) { + if (!op.equals(HiveOperation.IMPORT)){ + authorizer.authorize(write.getDatabase(), null, op.getOutputRequiredPrivileges()); + } + // We skip DB check for import here because we already handle it above as a CTAS check. + continue; + } + + if (write.getType() == WriteEntity.Type.PARTITION) { + Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false); + if (part != null) { + authorizer.authorize(write.getPartition(), null, op.getOutputRequiredPrivileges()); + continue; + } + } + + if (write.getTable() != null) { + authorizer.authorize(write.getTable(), null, op.getOutputRequiredPrivileges()); + } + } + } + + private static void authorizeInputs(HiveOperation op, BaseSemanticAnalyzer sem, Set inputs, + HiveAuthorizationProvider authorizer) throws HiveException { + if (CollectionUtils.isEmpty(inputs)) { + return; + } + + Map tableUsePartLevelAuth = getTableUsePartLevelAuth(inputs); + + // column authorization is checked through table scan operators. + Map> tab2Cols = new HashMap>(); + Map> part2Cols = new HashMap>(); + getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth); + + // cache the results for table authorization + Set tableAuthChecked = new HashSet(); + for (ReadEntity read : inputs) { + // if read is not direct, we do not need to check its autho. + if (read.isDummy() || read.isPathType() || !read.isDirect()) { + continue; + } + if (read.getType() == Entity.Type.DATABASE) { + authorizer.authorize(read.getDatabase(), op.getInputRequiredPrivileges(), null); + continue; + } + Table tbl = read.getTable(); + if (tbl.isView() && sem instanceof SemanticAnalyzer) { + tab2Cols.put(tbl, sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName())); + } + if (read.getPartition() != null) { + Partition partition = read.getPartition(); + tbl = partition.getTable(); + // use partition level authorization + if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) { + List cols = part2Cols.get(partition); + if (cols != null && cols.size() > 0) { + authorizer.authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null); + } else { + authorizer.authorize(partition, op.getInputRequiredPrivileges(), null); + } + continue; + } + } + + authorizeTable(op, authorizer, tableUsePartLevelAuth, tab2Cols, tableAuthChecked, tbl); + } + } + + private static Map getTableUsePartLevelAuth(Set inputs) { + // determine if partition level privileges should be checked for input tables + Map tableUsePartLevelAuth = new HashMap(); + for (ReadEntity read : inputs) { + if (read.isDummy() || read.isPathType() || read.getType() == Entity.Type.DATABASE) { + continue; + } + Table tbl = read.getTable(); + if ((read.getPartition() != null) || (tbl != null && tbl.isPartitioned())) { + String tblName = tbl.getTableName(); + if (tableUsePartLevelAuth.get(tblName) == null) { + boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && + ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")))); + if (usePartLevelPriv) { + tableUsePartLevelAuth.put(tblName, Boolean.TRUE); + } else { + tableUsePartLevelAuth.put(tblName, Boolean.FALSE); + } + } + } + } + return tableUsePartLevelAuth; + } + + private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, + Map> tab2Cols, Map> part2Cols, + Map tableUsePartLevelAuth) throws HiveException { + // for a select or create-as-select query, populate the partition to column (par2Cols) or + // table to columns mapping (tab2Cols) + if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) { + SemanticAnalyzer querySem = (SemanticAnalyzer) sem; + ParseContext parseCtx = querySem.getParseContext(); + + for (Map.Entry topOpMap : querySem.getParseContext().getTopOps().entrySet()) { + TableScanOperator tableScanOp = topOpMap.getValue(); + if (!tableScanOp.isInsideView()) { + Table tbl = tableScanOp.getConf().getTableMetadata(); + List cols = new ArrayList(); + for (int id : tableScanOp.getNeededColumnIDs()) { + cols.add(tbl.getCols().get(id).getName()); + } + // map may not contain all sources, since input list may have been optimized out + // or non-existent tho such sources may still be referenced by the TableScanOperator + // if it's null then the partition probably doesn't exist so let's use table permission + if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) { + String aliasId = topOpMap.getKey(); + + PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, aliasId); + Set parts = partsList.getPartitions(); + for (Partition part : parts) { + List existingCols = part2Cols.get(part); + if (existingCols == null) { + existingCols = new ArrayList(); + } + existingCols.addAll(cols); + part2Cols.put(part, existingCols); + } + } else { + List existingCols = tab2Cols.get(tbl); + if (existingCols == null) { + existingCols = new ArrayList(); + } + existingCols.addAll(cols); + tab2Cols.put(tbl, existingCols); + } + } + } + } + } + + private static void authorizeTable(HiveOperation op, HiveAuthorizationProvider authorizer, + Map tableUsePartLevelAuth, Map> tab2Cols, Set tableAuthChecked, + Table tbl) throws HiveException { + // if we reach here, it means it needs to do a table authorization check, and the table authorization may + // have already happened because of other partitions + if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && + !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) { + List cols = tab2Cols.get(tbl); + if (cols != null && cols.size() > 0) { + authorizer.authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null); + } else { + authorizer.authorize(tbl, op.getInputRequiredPrivileges(), null); + } + tableAuthChecked.add(tbl.getTableName()); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java new file mode 100644 index 0000000000..8e28171fb9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security.authorization.command; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; + +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionUtils; +import org.apache.hadoop.hive.ql.exec.FunctionInfo.FunctionType; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.hooks.Entity.Type; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivObjectActionType; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + * Command authorization, old type. + */ +class CommandAuthorizerV2 { + private CommandAuthorizerV2() { + throw new UnsupportedOperationException("CommandAuthorizerV2 should not be instantiated"); + } + + static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, SessionState ss, + Set inputs, Set outputs, String command) throws HiveException { + HiveOperationType hiveOpType = HiveOperationType.valueOf(op.name()); + + // colAccessInfo is set only in case of SemanticAnalyzer + Map> selectTab2Cols = sem.getColumnAccessInfo() != null + ? sem.getColumnAccessInfo().getTableToColumnAccessMap() : null; + Map> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null + ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null; + + List inputList = new ArrayList(inputs); + List outputList = new ArrayList(outputs); + addPermanentFunctionEntities(ss, inputList); + + List inputsHObjs = getHivePrivObjects(inputList, selectTab2Cols); + List outputHObjs = getHivePrivObjects(outputList, updateTab2Cols); + + HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); + authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); + authzContextBuilder.setForwardedAddresses(ss.getForwardedAddresses()); + authzContextBuilder.setCommandString(command); + + ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); + } + + private static void addPermanentFunctionEntities(SessionState ss, List inputList) throws HiveException { + for (Entry function : ss.getCurrentFunctionsInUse().entrySet()) { + if (function.getValue().getFunctionType() != FunctionType.PERSISTENT) { + // Built-in function access is allowed to all users. If user can create a temp function, they may use it. + continue; + } + + String[] qualifiedFunctionName = FunctionUtils.getQualifiedFunctionNameParts(function.getKey()); + // this is only for the purpose of authorization, only the name matters. + Database db = new Database(qualifiedFunctionName[0], "", "", null); + inputList.add(new ReadEntity(db, qualifiedFunctionName[1], function.getValue().getClassName(), Type.FUNCTION)); + } + } + + private static List getHivePrivObjects(List privObjects, + Map> tableName2Cols) { + List hivePrivobjs = new ArrayList(); + if (privObjects == null){ + return hivePrivobjs; + } + + for (Entity privObject : privObjects){ + if (privObject.isDummy()) { + //do not authorize dummy readEntity or writeEntity + continue; + } + if (privObject instanceof ReadEntity && !((ReadEntity)privObject).isDirect()) { + // This ReadEntity represents one of the underlying tables/views of a view, so skip it. + continue; + } + if (privObject instanceof WriteEntity && ((WriteEntity)privObject).isTempURI()) { + // do not authorize temporary uris + continue; + } + if (privObject.getTyp() == Type.TABLE && (privObject.getT() == null || privObject.getT().isTemporary())) { + // skip temporary tables from authorization + continue; + } + + addHivePrivObject(privObject, tableName2Cols, hivePrivobjs); + } + return hivePrivobjs; + } + + private static void addHivePrivObject(Entity privObject, Map> tableName2Cols, + List hivePrivObjs) { + HivePrivilegeObjectType privObjType = AuthorizationUtils.getHivePrivilegeObjectType(privObject.getType()); + HivePrivObjectActionType actionType = AuthorizationUtils.getActionType(privObject); + HivePrivilegeObject hivePrivObject = null; + switch(privObject.getType()){ + case DATABASE: + Database database = privObject.getDatabase(); + hivePrivObject = new HivePrivilegeObject(privObjType, database.getName(), null, null, null, actionType, null, + null, database.getOwnerName(), database.getOwnerType()); + break; + case TABLE: + Table table = privObject.getTable(); + List columns = tableName2Cols == null ? null : + tableName2Cols.get(Table.getCompleteName(table.getDbName(), table.getTableName())); + hivePrivObject = new HivePrivilegeObject(privObjType, table.getDbName(), table.getTableName(), + null, columns, actionType, null, null, table.getOwner(), table.getOwnerType()); + break; + case DFS_DIR: + case LOCAL_DIR: + hivePrivObject = new HivePrivilegeObject(privObjType, null, privObject.getD().toString(), null, null, + actionType, null, null, null, null); + break; + case FUNCTION: + String dbName = privObject.getDatabase() != null ? privObject.getDatabase().getName() : null; + hivePrivObject = new HivePrivilegeObject(privObjType, dbName, privObject.getFunctionName(), + null, null, actionType, null, privObject.getClassName(), null, null); + break; + case DUMMYPARTITION: + case PARTITION: + // TODO: not currently handled + return; + case SERVICE_NAME: + hivePrivObject = new HivePrivilegeObject(privObjType, null, privObject.getServiceName(), null, + null, actionType, null, null, null, null); + break; + default: + throw new AssertionError("Unexpected object type"); + } + hivePrivObjs.add(hivePrivObject); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/package-info.java ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/package-info.java new file mode 100644 index 0000000000..144382978a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Command Authorization codes. */ +package org.apache.hadoop.hive.ql.security.authorization.command;