diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java index ab4cd6c55b4b3ac8ef28ef9d38297f2452cba00e..1146bd5defcd6922bd1a34248882e63268bcb008 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.util; import java.io.IOException; +import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -106,6 +107,7 @@ final boolean dryRun; final TableType tableType; final int tablePoolSize; + final String fsOperationUser; RunOptions(String dbRegex, String tableRegex, @@ -118,7 +120,8 @@ boolean shouldMoveExternal, boolean dryRun, TableType tableType, - int tablePoolSize) { + int tablePoolSize, + String fsOperationUser) { super(); this.dbRegex = dbRegex; this.tableRegex = tableRegex; @@ -132,6 +135,7 @@ this.dryRun = dryRun; this.tableType = tableType; this.tablePoolSize = tablePoolSize; + this.fsOperationUser = fsOperationUser; } public void setShouldModifyManagedTableLocation(boolean shouldModifyManagedTableLocation) { @@ -157,6 +161,7 @@ public String toString() { ", dryRun=" + dryRun + ", tableType=" + tableType + ", tablePoolSize=" + tablePoolSize + + ", fsOperationUser=" + fsOperationUser + '}'; } } @@ -334,6 +339,15 @@ static Options createOptions() { .withArgName("table type") .create("tt")); + result.addOption(OptionBuilder + .withLongOpt("fsOperationUser") + .withDescription("If set, migration tool will impersonate this user to carry out write operations on file " + + "system. Useful e.g. if this tool is run as hive, but chown-ing is also a requirement." + + "If this is unset file operations will be run in the name of the user running this process (or kinit'ed " + + "user in Kerberos environments)") + .hasArg() + .create()); + return result; } @@ -378,6 +392,8 @@ static RunOptions createRunOptions(CommandLine cli) throws Exception { } boolean dryRun = cli.hasOption("dryRun"); + String fsOperationUser = cli.getOptionValue("fsOperationUser"); + String tableTypeText = cli.getOptionValue("tableType"); int defaultPoolSize = Runtime.getRuntime().availableProcessors() / 2; @@ -406,7 +422,8 @@ static RunOptions createRunOptions(CommandLine cli) throws Exception { shouldMoveExternal, dryRun, tableTypeText == null ? null : TableType.valueOf(tableTypeText), - tablePoolSize); + tablePoolSize, + fsOperationUser); return runOpts; } @@ -431,6 +448,7 @@ private static int getIntOptionValue(CommandLine commandLine, String optionName, private final String groupName; private final FsPermission dirPerms; private final FsPermission filePerms; + private final UserGroupInformation fsOperationUser; private CloseableThreadLocal hms; private ThreadLocal wh; @@ -462,6 +480,17 @@ private static int getIntOptionValue(CommandLine commandLine, String optionName, } } + try { + if (runOptions.fsOperationUser != null) { + fsOperationUser = UserGroupInformation.createProxyUser(runOptions.fsOperationUser, + UserGroupInformation.getLoginUser()); + } else { + fsOperationUser = UserGroupInformation.getLoginUser(); + } + } catch (IOException e) { + throw new RuntimeException("Error while setting up UGI for FS operations."); + } + this.hms = new CloseableThreadLocal<>(() -> { try { HiveMetaStoreClient hiveMetaStoreClient = new HiveMetaStoreClient(conf); @@ -650,7 +679,7 @@ void processDatabase(String dbName, ForkJoinPool tablePool) { LOG.info("Changing location of database {} to {}", dbName, newDefaultDbLocation); if (!runOptions.dryRun) { - FileSystem fs = newDefaultDbLocation.getFileSystem(conf); + FileSystem fs = getFS(newDefaultDbLocation, conf, fsOperationUser); FileUtils.mkdir(fs, newDefaultDbLocation, conf); // Set appropriate owner/perms of the DB dir only, no need to recurse checkAndSetFileOwnerPermissions(fs, newDefaultDbLocation, @@ -853,7 +882,7 @@ boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition void createExternalDbDir(Database dbObj) throws IOException, MetaException { Path externalTableDbPath = wh.get().getDefaultExternalDatabasePath(dbObj.getName()); - FileSystem fs = externalTableDbPath.getFileSystem(conf); + FileSystem fs = getFS(externalTableDbPath, conf, fsOperationUser); if (!fs.exists(externalTableDbPath)) { String dbOwner = ownerName; String dbGroup = null; @@ -872,6 +901,9 @@ void createExternalDbDir(Database dbObj) throws IOException, MetaException { } } + if (dbOwner == null) { + dbOwner = "hive"; + } LOG.info("Creating external table directory for database {} at {} with ownership {}/{}", dbObj.getName(), externalTableDbPath, dbOwner, dbGroup); if (!runOptions.dryRun) { @@ -897,7 +929,7 @@ void moveTableData(Database dbObj, Table tableObj, Path newTablePath) throws Hiv // Move table directory. if (!runOptions.dryRun) { - FileSystem fs = newTablePath.getFileSystem(conf); + FileSystem fs = getFS(newTablePath, conf, fsOperationUser); if (fs.exists(oldTablePath)) { boolean movedData = fs.rename(oldTablePath, newTablePath); if (!movedData) { @@ -1659,6 +1691,20 @@ private static void migrateKuduStorageHandlerType(Table table, Map() { + @Override + public FileSystem run() throws Exception { + return path.getFileSystem(conf); + } + }); + } catch (InterruptedException e) { + throw new IOException(e); + } + } + /** * can set it from tests to test when config needs something other than default values. */ diff --git a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java index 038a8a263ff6653d34240924fff2507a116519bf..a6862a83827c27d252c1c1634655a8d5018421f8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java @@ -140,6 +140,26 @@ public void testExternalMoveFailsForIncorrectOptions() throws Throwable { } } + /** + * Should encounter a DB with an unset owner, and should try to chown the new dir path to 'hive' user. + * This will always fail in this test, as we're never running it as root. + * @throws Exception + */ + @Test(expected = AssertionError.class) + public void testExtDbDirOnFsIsCreatedAsHiveIfDbOwnerNull() throws Exception { + runStatementOnDriver("drop database if exists ownerlessdb"); + runStatementOnDriver("create database ownerlessdb"); + Database db = Hive.get().getDatabase("ownerlessdb"); + db.setOwnerName(null); + Hive.get().alterDatabase("ownerlessdb", db); + + String[] args = {"-m", "external"}; + HiveConf newConf = new HiveConf(hiveConf); + File newExtWarehouseDir = new File(getTestDataDir(), "newExternal"); + newConf.set(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname, newExtWarehouseDir.getAbsolutePath()); + runMigrationTool(newConf, args); + } + @Override protected String getTestDataDir() { return TEST_DATA_DIR;