diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java index 19fb750a658725bb0588585cb2c7c786e7f15585..e2e5983a2830eddcdd25307970e81d613abc6149 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java @@ -73,6 +73,10 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; +import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE; + public class HiveStrictManagedMigration { private static final Logger LOG = LoggerFactory.getLogger(HiveStrictManagedMigration.class); @@ -91,9 +95,10 @@ final String oldWarehouseRoot; final TableMigrationOption migrationOption; final Properties confProps; - final boolean shouldModifyManagedTableLocation; + boolean shouldModifyManagedTableLocation; final boolean shouldModifyManagedTableOwner; final boolean shouldModifyManagedTablePermissions; + boolean shouldMoveExternal; final boolean dryRun; final TableType tableType; final int tablePoolSize; @@ -106,6 +111,7 @@ boolean shouldModifyManagedTableLocation, boolean shouldModifyManagedTableOwner, boolean shouldModifyManagedTablePermissions, + boolean shouldMoveExternal, boolean dryRun, TableType tableType, int tablePoolSize) { @@ -118,24 +124,18 @@ this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation; this.shouldModifyManagedTableOwner = shouldModifyManagedTableOwner; this.shouldModifyManagedTablePermissions = shouldModifyManagedTablePermissions; + this.shouldMoveExternal = shouldMoveExternal; this.dryRun = dryRun; this.tableType = tableType; this.tablePoolSize = tablePoolSize; } - public RunOptions setShouldModifyManagedTableLocation(boolean shouldModifyManagedTableLocation) { - return new RunOptions( - this.dbRegex, - this.tableRegex, - this.oldWarehouseRoot, - this.migrationOption, - this.confProps, - shouldModifyManagedTableLocation, - this.shouldModifyManagedTableOwner, - this.shouldModifyManagedTablePermissions, - this.dryRun, - this.tableType, - this.tablePoolSize); + public void setShouldModifyManagedTableLocation(boolean shouldModifyManagedTableLocation) { + this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation; + } + + public void setShouldMoveExternal(boolean shouldMoveExternal) { + this.shouldMoveExternal = shouldMoveExternal; } @Override @@ -149,6 +149,7 @@ public String toString() { ", shouldModifyManagedTableLocation=" + shouldModifyManagedTableLocation + ", shouldModifyManagedTableOwner=" + shouldModifyManagedTableOwner + ", shouldModifyManagedTablePermissions=" + shouldModifyManagedTablePermissions + + ", shouldMoveExternal=" + shouldMoveExternal + ", dryRun=" + dryRun + ", tableType=" + tableType + ", tablePoolSize=" + tablePoolSize + @@ -172,16 +173,22 @@ public String toString() { private static class WarehouseRootCheckResult { final boolean shouldModifyManagedTableLocation; - final Path curWhRootPath; + final boolean shouldMoveExternal; + final Path targetPath; final HadoopShims.HdfsEncryptionShim encryptionShim; + final HadoopShims.HdfsErasureCodingShim ecShim; WarehouseRootCheckResult( boolean shouldModifyManagedTableLocation, + boolean shouldMoveExternal, Path curWhRootPath, - HadoopShims.HdfsEncryptionShim encryptionShim) { + HadoopShims.HdfsEncryptionShim encryptionShim, + HadoopShims.HdfsErasureCodingShim ecShim) { this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation; - this.curWhRootPath = curWhRootPath; + this.shouldMoveExternal = shouldMoveExternal; + this.targetPath = curWhRootPath; this.encryptionShim = encryptionShim; + this.ecShim = ecShim; } } @@ -208,8 +215,10 @@ public static void main(String[] args) throws Exception { try { HiveConf conf = hiveConf == null ? new HiveConf() : hiveConf; WarehouseRootCheckResult warehouseRootCheckResult = checkOldWarehouseRoot(runOptions, conf); - runOptions = runOptions.setShouldModifyManagedTableLocation( - warehouseRootCheckResult.shouldModifyManagedTableLocation); + runOptions.setShouldModifyManagedTableLocation( + warehouseRootCheckResult.shouldModifyManagedTableLocation); + runOptions.setShouldMoveExternal( + warehouseRootCheckResult.shouldMoveExternal); boolean createExternalDirsForDbs = checkExternalWarehouseDir(conf); OwnerPermsOptions ownerPermsOptions = checkOwnerPermsOptions(runOptions, conf); @@ -295,6 +304,12 @@ static Options createOptions() { "shouldModifyManagedTableOwner, shouldModifyManagedTablePermissions options") .create()); + result.addOption(OptionBuilder + .withLongOpt("shouldMoveExternal") + .withDescription("Whether tables living in the old warehouse path should have their data moved to the" + + " default external location. Applicable only if migrationOption = external") + .create()); + result.addOption(OptionBuilder .withLongOpt("help") .withDescription("print help message") @@ -347,6 +362,16 @@ static RunOptions createRunOptions(CommandLine cli) throws Exception { shouldModifyManagedTablePermissions = true; } String oldWarehouseRoot = cli.getOptionValue("oldWarehouseRoot"); + boolean shouldMoveExternal = cli.hasOption("shouldMoveExternal"); + if (shouldMoveExternal && !migrationOption.equals(TableMigrationOption.EXTERNAL)) { + throw new IllegalArgumentException("Please select external as migration option, it is required for " + + "shouldMoveExternal option."); + } + if (shouldModifyManagedTableLocation && shouldMoveExternal) { + throw new IllegalArgumentException("Options shouldModifyManagedTableLocation and " + + "shouldMoveExternal cannot be used at the same time. Migration with move option on " + + " managed tables either ends up with them remaining managed or converted to external, but can't be both."); + } boolean dryRun = cli.hasOption("dryRun"); String tableTypeText = cli.getOptionValue("tableType"); @@ -374,6 +399,7 @@ static RunOptions createRunOptions(CommandLine cli) throws Exception { shouldModifyManagedTableLocation, shouldModifyManagedTableOwner, shouldModifyManagedTablePermissions, + shouldMoveExternal, dryRun, tableTypeText == null ? null : TableType.valueOf(tableTypeText), tablePoolSize); @@ -392,10 +418,11 @@ private static int getIntOptionValue(CommandLine commandLine, String optionName, } private final HiveConf conf; - private RunOptions runOptions; + private final RunOptions runOptions; private final boolean createExternalDirsForDbs; - private final Path curWhRootPath; + private final Path targetPath; private final HadoopShims.HdfsEncryptionShim encryptionShim; + private final HadoopShims.HdfsErasureCodingShim ecShim; private final String ownerName; private final String groupName; private final FsPermission dirPerms; @@ -418,8 +445,9 @@ private static int getIntOptionValue(CommandLine commandLine, String optionName, this.groupName = ownerPermsOptions.groupName; this.dirPerms = ownerPermsOptions.dirPerms; this.filePerms = ownerPermsOptions.filePerms; - this.curWhRootPath = warehouseRootCheckResult.curWhRootPath; + this.targetPath = warehouseRootCheckResult.targetPath; this.encryptionShim = warehouseRootCheckResult.encryptionShim; + this.ecShim = warehouseRootCheckResult.ecShim; // Make sure all --hiveconf settings get added to the HiveConf. // This allows utility-specific settings (such as strict.managed.tables.migration.owner) @@ -449,7 +477,7 @@ private static int getIntOptionValue(CommandLine commandLine, String optionName, throw new RuntimeException(e); } }); - if (runOptions.shouldModifyManagedTableLocation) { + if (runOptions.shouldModifyManagedTableLocation || runOptions.shouldMoveExternal) { Configuration oldConf = new Configuration(conf); HiveConf.setVar(oldConf, HiveConf.ConfVars.METASTOREWAREHOUSE, runOptions.oldWarehouseRoot); @@ -500,43 +528,68 @@ void run() throws Exception { static WarehouseRootCheckResult checkOldWarehouseRoot(RunOptions runOptions, HiveConf conf) throws IOException { boolean shouldModifyManagedTableLocation = runOptions.shouldModifyManagedTableLocation; - Path curWhRootPath = null; + boolean shouldMoveExternal = runOptions.shouldMoveExternal; + Path targetPath = null; HadoopShims.HdfsEncryptionShim encryptionShim = null; + HadoopShims.HdfsErasureCodingShim ecShim = null; + + if (shouldMoveExternal && !checkExternalWarehouseDir(conf)) { + LOG.info("External warehouse path not specified/empty. Disabling shouldMoveExternal"); + shouldMoveExternal = false; + } - if (runOptions.shouldModifyManagedTableLocation) { + if (shouldModifyManagedTableLocation || shouldMoveExternal) { if (runOptions.oldWarehouseRoot == null) { - LOG.info("oldWarehouseRoot is not specified. Disabling shouldModifyManagedTableLocation"); + LOG.info("oldWarehouseRoot is not specified. Disabling shouldModifyManagedTableLocation and " + + "shouldMoveExternal"); shouldModifyManagedTableLocation = false; + shouldMoveExternal = false; } else { - String curWarehouseRoot = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE); - if (arePathsEqual(conf, runOptions.oldWarehouseRoot, curWarehouseRoot)) { - LOG.info("oldWarehouseRoot is the same as the current warehouse root {}." - + " Disabling shouldModifyManagedTableLocation", + String currentPathString = shouldModifyManagedTableLocation ? + HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE) : + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL); + if (arePathsEqual(conf, runOptions.oldWarehouseRoot, currentPathString)) { + LOG.info("oldWarehouseRoot is the same as the target path {}." + + " Disabling shouldModifyManagedTableLocation and shouldMoveExternal", runOptions.oldWarehouseRoot); shouldModifyManagedTableLocation = false; + shouldMoveExternal = false; } else { Path oldWhRootPath = new Path(runOptions.oldWarehouseRoot); - curWhRootPath = new Path(curWarehouseRoot); + targetPath = new Path(currentPathString); FileSystem oldWhRootFs = oldWhRootPath.getFileSystem(conf); - FileSystem curWhRootFs = curWhRootPath.getFileSystem(conf); + FileSystem curWhRootFs = targetPath.getFileSystem(conf); oldWhRootPath = oldWhRootFs.makeQualified(oldWhRootPath); - curWhRootPath = curWhRootFs.makeQualified(curWhRootPath); + targetPath = curWhRootFs.makeQualified(targetPath); if (!FileUtils.equalsFileSystem(oldWhRootFs, curWhRootFs)) { - LOG.info("oldWarehouseRoot {} has a different FS than the current warehouse root {}." - + " Disabling shouldModifyManagedTableLocation", - runOptions.oldWarehouseRoot, curWarehouseRoot); + LOG.info("oldWarehouseRoot {} has a different FS than the target path {}." + + " Disabling shouldModifyManagedTableLocation and shouldMoveExternal", + runOptions.oldWarehouseRoot, currentPathString); shouldModifyManagedTableLocation = false; + shouldMoveExternal = false; } else { if (!isHdfs(oldWhRootFs)) { - LOG.info("Warehouse is using non-HDFS FileSystem {}. Disabling shouldModifyManagedTableLocation", - oldWhRootFs.getUri()); + LOG.info("Warehouse is using non-HDFS FileSystem {}. Disabling shouldModifyManagedTableLocation and" + + "shouldMoveExternal", oldWhRootFs.getUri()); shouldModifyManagedTableLocation = false; + shouldMoveExternal = false; } else { encryptionShim = ShimLoader.getHadoopShims().createHdfsEncryptionShim(oldWhRootFs, conf); - if (!hasEquivalentEncryption(encryptionShim, oldWhRootPath, curWhRootPath)) { - LOG.info("oldWarehouseRoot {} and current warehouse root {} have different encryption zones." + - " Disabling shouldModifyManagedTableLocation", oldWhRootPath, curWhRootPath); + if (!hasEquivalentEncryption(encryptionShim, oldWhRootPath, targetPath)) { + LOG.info("oldWarehouseRoot {} and target path {} have different encryption zones." + + " Disabling shouldModifyManagedTableLocation and shouldMoveExternal", + oldWhRootPath, targetPath); shouldModifyManagedTableLocation = false; + shouldMoveExternal = false; + } else { + ecShim = ShimLoader.getHadoopShims().createHdfsErasureCodingShim(oldWhRootFs, conf); + if (!hasEquivalentErasureCodingPolicy(ecShim, oldWhRootPath, targetPath)) { + LOG.info("oldWarehouseRoot {} and target path {} have different erasure coding policies." + + " Disabling shouldModifyManagedTableLocation and shouldMoveExternal", + oldWhRootPath, targetPath); + shouldModifyManagedTableLocation = false; + shouldMoveExternal = false; + } } } } @@ -544,7 +597,8 @@ static WarehouseRootCheckResult checkOldWarehouseRoot(RunOptions runOptions, Hiv } } - return new WarehouseRootCheckResult(shouldModifyManagedTableLocation, curWhRootPath, encryptionShim); + return new WarehouseRootCheckResult(shouldModifyManagedTableLocation, shouldMoveExternal, + targetPath, encryptionShim, ecShim); } static OwnerPermsOptions checkOwnerPermsOptions(RunOptions runOptions, HiveConf conf) { @@ -581,9 +635,14 @@ void processDatabase(String dbName, ForkJoinPool tablePool) { LOG.info("Processing database {}", dbName); Database dbObj = hms.get().getDatabase(dbName); - boolean modifyDefaultManagedLocation = shouldModifyDatabaseLocation(dbObj); - if (modifyDefaultManagedLocation) { - Path newDefaultDbLocation = wh.get().getDefaultDatabasePath(dbName); + if (createExternalDirsForDbs) { + createExternalDbDir(dbObj); + } + + boolean modifyLocation = shouldModifyDatabaseLocation(dbObj); + + if (modifyLocation) { + Path newDefaultDbLocation = getDefaultDbPathManagedOrExternal(dbName); LOG.info("Changing location of database {} to {}", dbName, newDefaultDbLocation); if (!runOptions.dryRun) { @@ -595,10 +654,6 @@ void processDatabase(String dbName, ForkJoinPool tablePool) { } } - if (createExternalDirsForDbs) { - createExternalDbDir(dbObj); - } - List tableNames; if (runOptions.tableType == null) { tableNames = hms.get().getTables(dbName, runOptions.tableRegex); @@ -609,20 +664,20 @@ void processDatabase(String dbName, ForkJoinPool tablePool) { } boolean errorsInThisDb = !tablePool.submit(() -> tableNames.parallelStream() - .map(tableName -> processTable(dbObj, tableName, modifyDefaultManagedLocation)) + .map(tableName -> processTable(dbObj, tableName, modifyLocation)) .reduce(true, (aBoolean, aBoolean2) -> aBoolean && aBoolean2)).get(); if (errorsInThisDb) { failuresEncountered.set(true); } // Finally update the DB location. This would prevent subsequent runs of the migration from processing this DB. - if (modifyDefaultManagedLocation) { + if (modifyLocation) { if (errorsInThisDb) { LOG.error("Not updating database location for {} since an error was encountered. " + "The migration must be run again for this database.", dbObj.getName()); } else { if (!runOptions.dryRun) { - Path newDefaultDbLocation = wh.get().getDefaultDatabasePath(dbName); + Path newDefaultDbLocation = getDefaultDbPathManagedOrExternal(dbName); // dbObj after this call would have the new DB location. // Keep that in mind if anything below this requires the old DB path. hiveUpdater.get().updateDbLocation(dbObj, newDefaultDbLocation); @@ -638,6 +693,12 @@ void processDatabase(String dbName, ForkJoinPool tablePool) { } } + private Path getDefaultDbPathManagedOrExternal(String dbName) throws MetaException { + return runOptions.shouldMoveExternal ? + wh.get().getDefaultExternalDatabasePath(dbName) : + wh.get().getDefaultDatabasePath(dbName); + } + public static boolean migrateTable(Table tableObj, TableType tableType, TableMigrationOption migrationOption, boolean dryRun, HiveUpdater hiveUpdater, IMetaStoreClient hms, Configuration conf) throws HiveException, IOException, TException { @@ -664,7 +725,7 @@ public static boolean migrateTable(Table tableObj, TableType tableType, TableMig return false; } - boolean processTable(Database dbObj, String tableName, boolean modifyDefaultManagedLocation) { + boolean processTable(Database dbObj, String tableName, boolean modifyLocation) { try { String dbName = dbObj.getName(); LOG.debug("Processing table {}", getQualifiedName(dbName, tableName)); @@ -686,19 +747,24 @@ boolean processTable(Database dbObj, String tableName, boolean modifyDefaultMana return true; } - if (TableType.valueOf(tableObj.getTableType()) == TableType.MANAGED_TABLE) { - Path tablePath = new Path(tableObj.getSd().getLocation()); - if (modifyDefaultManagedLocation && shouldModifyTableLocation(dbObj, tableObj)) { - Path newTablePath = wh.get().getDnsPath( - new Path(wh.get().getDefaultDatabasePath(dbName), - MetaStoreUtils.encodeTableName(tableName.toLowerCase()))); - moveTableData(dbObj, tableObj, newTablePath); - if (!runOptions.dryRun) { - // File ownership/permission checks should be done on the new table path. - tablePath = newTablePath; - } + Path tablePath = new Path(tableObj.getSd().getLocation()); + + boolean shouldMoveTable = modifyLocation && ( + (MANAGED_TABLE.name().equals(tableObj.getTableType()) && runOptions.shouldModifyManagedTableLocation) || + (EXTERNAL_TABLE.name().equals(tableObj.getTableType()) && runOptions.shouldMoveExternal)); + + if (shouldMoveTable && shouldModifyTableLocation(dbObj, tableObj)) { + Path newTablePath = wh.get().getDnsPath( + new Path(getDefaultDbPathManagedOrExternal(dbName), + MetaStoreUtils.encodeTableName(tableName.toLowerCase()))); + moveTableData(dbObj, tableObj, newTablePath); + if (!runOptions.dryRun) { + // File ownership/permission checks should be done on the new table path. + tablePath = newTablePath; } + } + if (MANAGED_TABLE.equals(tableType)) { if (runOptions.shouldModifyManagedTableOwner || runOptions.shouldModifyManagedTablePermissions) { FileSystem fs = tablePath.getFileSystem(conf); if (isHdfs(fs)) { @@ -717,17 +783,22 @@ boolean processTable(Database dbObj, String tableName, boolean modifyDefaultMana boolean shouldModifyDatabaseLocation(Database dbObj) throws IOException, MetaException { String dbName = dbObj.getName(); - if (runOptions.shouldModifyManagedTableLocation) { + if (runOptions.shouldModifyManagedTableLocation || runOptions.shouldMoveExternal) { // Check if the database location is in the default location based on the old warehouse root. // If so then change the database location to the default based on the current warehouse root. String dbLocation = dbObj.getLocationUri(); Path oldDefaultDbLocation = oldWh.get().getDefaultDatabasePath(dbName); if (arePathsEqual(conf, dbLocation, oldDefaultDbLocation.toString())) { - if (hasEquivalentEncryption(encryptionShim, oldDefaultDbLocation, curWhRootPath)) { - return true; + if (hasEquivalentEncryption(encryptionShim, oldDefaultDbLocation, targetPath)) { + if (hasEquivalentErasureCodingPolicy(ecShim, oldDefaultDbLocation, targetPath)) { + return true; + } else { + LOG.info("{} and {} have different EC policies. Will not change database location for {}", + oldDefaultDbLocation, targetPath, dbName); + } } else { LOG.info("{} and {} are on different encryption zones. Will not change database location for {}", - oldDefaultDbLocation, curWhRootPath, dbName); + oldDefaultDbLocation, targetPath, dbName); } } } @@ -742,28 +813,30 @@ boolean shouldModifyTableLocation(Database dbObj, Table tableObj) throws IOExcep String tableLocation = tableObj.getSd().getLocation(); Path oldDefaultTableLocation = oldWh.get().getDefaultTablePath(dbObj, tableObj.getTableName()); if (arePathsEqual(conf, tableLocation, oldDefaultTableLocation.toString())) { - if (hasEquivalentEncryption(encryptionShim, oldDefaultTableLocation, curWhRootPath)) { - return true; + if (hasEquivalentEncryption(encryptionShim, oldDefaultTableLocation, targetPath)) { + if (hasEquivalentErasureCodingPolicy(ecShim, oldDefaultTableLocation, targetPath)) { + return true; + } else { + LOG.info("{} and {} have different EC policies. Will not change table location for {}", + oldDefaultTableLocation, targetPath, getQualifiedName(tableObj)); + } } else { LOG.info("{} and {} are on different encryption zones. Will not change table location for {}", - oldDefaultTableLocation, curWhRootPath, getQualifiedName(tableObj)); + oldDefaultTableLocation, targetPath, getQualifiedName(tableObj)); } } return false; } - boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition partObj, Map partSpec) - throws IOException, MetaException { - String tableName = tableObj.getTableName(); + boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition partObj, + Map partSpec) throws IOException, MetaException { String partLocation = partObj.getSd().getLocation(); - Path oldDefaultPartLocation = oldWh.get().getDefaultPartitionPath(dbObj, tableObj, partSpec); + Path oldDefaultPartLocation = runOptions.shouldMoveExternal ? + oldWh.get().getPartitionPath(dbObj, tableObj, partSpec.values().stream().collect(toList())): + oldWh.get().getDefaultPartitionPath(dbObj, tableObj, partSpec); if (arePathsEqual(conf, partLocation, oldDefaultPartLocation.toString())) { - if (hasEquivalentEncryption(encryptionShim, oldDefaultPartLocation, curWhRootPath)) { - return true; - } else { - LOG.info("{} and {} are on different encryption zones. Will not change partition location", - oldDefaultPartLocation, curWhRootPath); - } + // No need to check encryption zone and EC policy. Data was moved already along with the whole table. + return true; } return false; } @@ -946,7 +1019,7 @@ static boolean migrateToExternalTable(Table tableObj, TableType tableType, boole } LOG.info("Converting {} to external table ...", getQualifiedName(tableObj)); if (!dryRun) { - tableObj.setTableType(TableType.EXTERNAL_TABLE.toString()); + tableObj.setTableType(EXTERNAL_TABLE.toString()); hiveUpdater.updateTableProperties(tableObj, convertToExternalTableProps); } return true; @@ -1540,6 +1613,20 @@ static boolean hasEquivalentEncryption(HadoopShims.HdfsEncryptionShim encryption return true; } + static boolean hasEquivalentErasureCodingPolicy(HadoopShims.HdfsErasureCodingShim ecShim, + Path path1, Path path2) throws IOException { + HadoopShims.HdfsFileErasureCodingPolicy policy1 = ecShim.getErasureCodingPolicy(path1); + HadoopShims.HdfsFileErasureCodingPolicy policy2 = ecShim.getErasureCodingPolicy(path2); + if (policy1 != null) { + return policy1.equals(policy2); + } else { + if (policy2 == null) { + return true; + } + return false; + } + } + /** * can set it from tests to test when config needs something other than default values. */ diff --git a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java index 057135bbcbe4a1e19f13980cca32044067402175..16d4772bb79a18e16c17cd2a392463a0826d22fe 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java @@ -17,16 +17,30 @@ */ package org.apache.hadoop.hive.ql.util; +import static java.util.stream.Collectors.toSet; import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.ACIDTBL; import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.ACIDTBLPART; import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.NONACIDNONBUCKET; import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.NONACIDORCTBL; import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.NONACIDORCTBL2; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.ql.TxnCommandsBaseForTests; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; + import org.junit.Assert; import org.junit.Test; @@ -34,6 +48,7 @@ private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + File.separator + TestHiveStrictManagedMigration.class.getCanonicalName() + "-" + System.currentTimeMillis() ).getPath().replaceAll("\\\\", "/"); + private static final String EXTERNAL_TABLE_LOCATION = new File(TEST_DATA_DIR, "tmp").getPath(); @Test public void testUpgrade() throws Exception { @@ -57,9 +72,7 @@ public void testUpgrade() throws Exception { File newWarehouseDir = new File(getTestDataDir(), "newWarehouse"); newConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, newWarehouseDir.getAbsolutePath()); newConf.set("strict.managed.tables.migration.owner", System.getProperty("user.name")); - HiveStrictManagedMigration.hiveConf = newConf; - HiveStrictManagedMigration.scheme = "file"; - HiveStrictManagedMigration.main(args); + runMigrationTool(newConf, args); Assert.assertTrue(newWarehouseDir.exists()); Assert.assertTrue(new File(newWarehouseDir, ACIDTBL.toString().toLowerCase()).exists()); @@ -69,10 +82,193 @@ public void testUpgrade() throws Exception { Assert.assertTrue(new File(newWarehouseDir, NONACIDORCTBL2.toString().toLowerCase()).exists()); Assert.assertTrue(new File(new File(newWarehouseDir, "test.db"), "tacid").exists()); Assert.assertTrue(new File(oldWarehouse, "texternal").exists()); + + // Tear down + runStatementOnDriver("drop database test cascade"); + Database defaultDb = Hive.get().getDatabase("default"); + defaultDb.setLocationUri(oldWarehouse); + Hive.get().alterDatabase("default", defaultDb); + System.setProperty("hive.strict.managed.tables", "false"); + } + + /** + * Tests shouldMoveExternal option on all possible scenarios of the following dimensions: + * - managed or external table type? + * - location in (old) warehouse or truly external location? + * - is partitioned? + * - is partition location default (under table directory) or custom external path? + * - default or custom database? + * @throws Exception + */ + @Test + public void testExternalMove() throws Exception { + setupExternalTableTest(); + String oldWarehouse = getWarehouseDir(); + String[] args = {"-m", "external", "--shouldMoveExternal", "--tableRegex", "man.*|ext.*|custm.*|custe.*", + "--oldWarehouseRoot", oldWarehouse}; + HiveConf newConf = new HiveConf(hiveConf); + File newManagedWarehouseDir = new File(getTestDataDir(), "newManaged"); + File newExtWarehouseDir = new File(getTestDataDir(), "newExternal"); + newConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, newManagedWarehouseDir.getAbsolutePath()); + newConf.set(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname, newExtWarehouseDir.getAbsolutePath()); + runMigrationTool(newConf, args); + Assert.assertTrue(newExtWarehouseDir.exists()); + assertExternalTableLocations(newExtWarehouseDir, new File(EXTERNAL_TABLE_LOCATION)); + assertSDLocationCorrect(); + } + + @Test(expected = IllegalArgumentException.class) + public void testExternalMoveFailsForIncorrectOptions() throws Throwable { + try { + String[] args = {"-m", "automatic", "--shouldMoveExternal"}; + runMigrationTool(new HiveConf(hiveConf), args); + } catch (Exception e) { + // Exceptions are re-packaged by the migration tool... + throw e.getCause(); + } } @Override protected String getTestDataDir() { return TEST_DATA_DIR; } + + + private static void runMigrationTool(HiveConf hiveConf, String[] args) throws Exception { + HiveStrictManagedMigration.hiveConf = hiveConf; + HiveStrictManagedMigration.scheme = "file"; + HiveStrictManagedMigration.main(args); + } + + private void setupExternalTableTest() throws Exception { + runStatementOnDriver("drop table if exists manwhnone"); + runStatementOnDriver("drop table if exists manoutnone"); + runStatementOnDriver("drop table if exists manwhwh"); + runStatementOnDriver("drop table if exists manwhout"); + runStatementOnDriver("drop table if exists manwhmixed"); + runStatementOnDriver("drop table if exists manoutout"); + runStatementOnDriver("drop table if exists extwhnone"); + runStatementOnDriver("drop table if exists extoutnone"); + runStatementOnDriver("drop table if exists extwhwh"); + runStatementOnDriver("drop table if exists extwhout"); + runStatementOnDriver("drop table if exists extwhmixed"); + runStatementOnDriver("drop table if exists extoutout"); + runStatementOnDriver("drop table if exists custdb.custmanwhwh"); + runStatementOnDriver("drop table if exists custdb.custextwhwh"); + runStatementOnDriver("create table manwhnone (a string)"); + runStatementOnDriver("create table manoutnone (a string) location '" + EXTERNAL_TABLE_LOCATION + + "/manoutnone'"); + runStatementOnDriver("create table manwhwh (a string) partitioned by (p string)"); + runStatementOnDriver("alter table manwhwh add partition (p='p1')"); + runStatementOnDriver("alter table manwhwh add partition (p='p2')"); + runStatementOnDriver("create table manwhout (a string) partitioned by (p string)"); + runStatementOnDriver("alter table manwhout add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION + + "/manwhoutp1'"); + runStatementOnDriver("alter table manwhout add partition (p='p2') location '" + EXTERNAL_TABLE_LOCATION + + "/manwhoutp2'"); + runStatementOnDriver("create table manwhmixed (a string) partitioned by (p string)"); + runStatementOnDriver("alter table manwhmixed add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION + + "/manwhmixedp1'"); + runStatementOnDriver("alter table manwhmixed add partition (p='p2')"); + runStatementOnDriver("create table manoutout (a string) partitioned by (p string) location '" + + EXTERNAL_TABLE_LOCATION + "/manoutout'"); + runStatementOnDriver("alter table manoutout add partition (p='p1')"); + runStatementOnDriver("alter table manoutout add partition (p='p2')"); + runStatementOnDriver("create external table extwhnone (a string)"); + runStatementOnDriver("create external table extoutnone (a string) location '" + EXTERNAL_TABLE_LOCATION + + "/extoutnone'"); + runStatementOnDriver("create external table extwhwh (a string) partitioned by (p string)"); + runStatementOnDriver("alter table extwhwh add partition (p='p1')"); + runStatementOnDriver("alter table extwhwh add partition (p='p2')"); + runStatementOnDriver("create external table extwhout (a string) partitioned by (p string)"); + runStatementOnDriver("alter table extwhout add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION + + "/extwhoutp1'"); + runStatementOnDriver("alter table extwhout add partition (p='p2') location '" + EXTERNAL_TABLE_LOCATION + + "/extwhoutp2'"); + runStatementOnDriver("create external table extwhmixed (a string) partitioned by (p string)"); + runStatementOnDriver("alter table extwhmixed add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION + + "/extwhmixedp1'"); + runStatementOnDriver("alter table extwhmixed add partition (p='p2')"); + runStatementOnDriver("create external table extoutout (a string) partitioned by (p string) location '" + + EXTERNAL_TABLE_LOCATION + "/extoutout'"); + runStatementOnDriver("alter table extoutout add partition (p='p1')"); + runStatementOnDriver("alter table extoutout add partition (p='p2')"); + runStatementOnDriver("drop database if exists custdb"); + runStatementOnDriver("create database custdb"); + runStatementOnDriver("create table custdb.custmanwhwh (a string) partitioned by (p string)"); + runStatementOnDriver("alter table custdb.custmanwhwh add partition (p='p1')"); + runStatementOnDriver("alter table custdb.custmanwhwh add partition (p='p2')"); + runStatementOnDriver("create external table custdb.custextwhwh (a string) partitioned by (p string)"); + runStatementOnDriver("alter table custdb.custextwhwh add partition (p='p1')"); + runStatementOnDriver("alter table custdb.custextwhwh add partition (p='p2')"); + } + + private static void assertExternalTableLocations(File exteralWarehouseDir, File externalNonWhDir) + throws IOException { + Set actualDirs = Files.find(Paths.get(exteralWarehouseDir.toURI()), Integer.MAX_VALUE, (p, a)->true) + .map(p->p.toString().replaceAll(exteralWarehouseDir.getAbsolutePath(), "")) + .filter(s->!s.isEmpty()).collect(toSet()); + Set expectedDirs = new HashSet<>(); + expectedDirs.add("/extwhwh"); + expectedDirs.add("/extwhwh/p=p2"); + expectedDirs.add("/extwhwh/p=p1"); + expectedDirs.add("/extwhmixed"); + expectedDirs.add("/extwhmixed/p=p2"); + expectedDirs.add("/manwhwh"); + expectedDirs.add("/manwhwh/p=p2"); + expectedDirs.add("/manwhwh/p=p1"); + expectedDirs.add("/custdb.db"); + expectedDirs.add("/custdb.db/custmanwhwh"); + expectedDirs.add("/custdb.db/custmanwhwh/p=p2"); + expectedDirs.add("/custdb.db/custmanwhwh/p=p1"); + expectedDirs.add("/custdb.db/custextwhwh"); + expectedDirs.add("/custdb.db/custextwhwh/p=p2"); + expectedDirs.add("/custdb.db/custextwhwh/p=p1"); + expectedDirs.add("/manwhout"); + expectedDirs.add("/manwhnone"); + expectedDirs.add("/manwhmixed"); + expectedDirs.add("/manwhmixed/p=p2"); + expectedDirs.add("/extwhnone"); + expectedDirs.add("/extwhout"); + assertEquals("Unexpected external warehouse directory structure in " + exteralWarehouseDir, expectedDirs, + actualDirs); + + actualDirs = Files.find(Paths.get(externalNonWhDir.toURI()), Integer.MAX_VALUE, (p, a)->true) + .map(p->p.toString().replaceAll(externalNonWhDir.getAbsolutePath(), "")) + .filter(s->!s.isEmpty()).collect(toSet()); + expectedDirs.clear(); + expectedDirs.add("/manoutout"); + expectedDirs.add("/extoutout/p=p2"); + expectedDirs.add("/extoutout/p=p1"); + expectedDirs.add("/extwhoutp2"); + expectedDirs.add("/extwhoutp1"); + expectedDirs.add("/manwhmixedp1"); + expectedDirs.add("/manwhoutp1"); + expectedDirs.add("/manoutout/p=p1"); + expectedDirs.add("/manoutout/p=p2"); + expectedDirs.add("/manwhoutp2"); + expectedDirs.add("/extoutnone"); + expectedDirs.add("/manoutnone"); + expectedDirs.add("/extoutout"); + expectedDirs.add("/extwhmixedp1"); + assertEquals("Unexpected external (non-warehouse) directory structure in " + externalNonWhDir, expectedDirs, + actualDirs); + } + + private static void assertSDLocationCorrect() throws HiveException { + org.apache.hadoop.hive.ql.metadata.Table table = Hive.get().getTable("manwhwh"); + List partitions = Hive.get().getPartitions(table); + assertTrue(partitions.get(0).getLocation().contains("/newExternal/manwhwh/p=p1")); + assertTrue(partitions.get(1).getLocation().contains("/newExternal/manwhwh/p=p2")); + + table = Hive.get().getTable("manwhout"); + partitions = Hive.get().getPartitions(table); + assertTrue(partitions.get(0).getLocation().contains("/tmp/manwhoutp1")); + assertTrue(partitions.get(1).getLocation().contains("/tmp/manwhoutp2")); + + table = Hive.get().getTable("manwhmixed"); + partitions = Hive.get().getPartitions(table); + assertTrue(partitions.get(0).getLocation().contains("/tmp/manwhmixedp1")); + assertTrue(partitions.get(1).getLocation().contains("/newExternal/manwhmixed/p=p2")); + } }