diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2f695d4acc..d3cb60b790 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -455,7 +455,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal REPLCMENCRYPTEDDIR("hive.repl.cm.encryptionzone.rootdir", ".cmroot", "Root dir for ChangeManager if encryption zones are enabled, used for deleted files."), REPLCMFALLBACKNONENCRYPTEDDIR("hive.repl.cm.nonencryptionzone.rootdir", - "/user/${system:user.name}/cmroot/", + "", "Root dir for ChangeManager for non encrypted paths if hive.repl.cmrootdir is encrypted."), REPLCMINTERVAL("hive.repl.cm.interval","3600s", new TimeValidator(TimeUnit.SECONDS), diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java index d89d67c423..7ba0d3ee2a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java @@ -109,6 +109,7 @@ private void addReplChangeManagerConfigs() throws Exception { MetastoreConf.setBoolVar(conf, ConfVars.REPLCMENABLED, true); String cmroot = "hdfs://" + miniDFS.getNameNode().getHostAndPort() + "/cmroot"; MetastoreConf.setVar(conf, ConfVars.REPLCMDIR, cmroot); + MetastoreConf.setVar(conf, ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR, cmroot); threadNames.put(ReplChangeManager.CM_THREAD_NAME_PREFIX, false); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java index 51bb78733a..41a1ce9e1d 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java @@ -1364,8 +1364,10 @@ public void testCmrootEncrypted() throws Exception { "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal); - String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmroot"; + String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmrootDirEncrypted"; encryptedHiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmrootdirEncrypted); + FileSystem cmrootdirEncryptedFs = new Path(cmrootdirEncrypted).getFileSystem(hiveConf); + cmrootdirEncryptedFs.mkdirs(new Path(cmrootdirEncrypted)); encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmrootFallBack); //Create cm in encrypted zone @@ -1410,10 +1412,89 @@ public void testCmrootEncrypted() throws Exception { exceptionThrown = true; } assertFalse(exceptionThrown); + cmrootdirEncryptedFs.delete(new Path(cmrootdirEncrypted), true); ReplChangeManager.resetReplChangeManagerInstance(); initReplChangeManager(); } + @Test + public void testCmrootFallbackEncrypted() throws Exception { + HiveConf encryptedHiveConf = new HiveConf(TestReplChangeManager.class); + encryptedHiveConf.setBoolean(HiveConf.ConfVars.REPLCMENABLED.varname, true); + encryptedHiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60); + encryptedHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, + "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal); + String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmrootIsEncrypted"; + String cmRootFallbackEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + + "/cmrootFallbackEncrypted"; + FileSystem cmrootdirEncryptedFs = new Path(cmrootdirEncrypted).getFileSystem(encryptedHiveConf); + try { + cmrootdirEncryptedFs.mkdirs(new Path(cmrootdirEncrypted)); + cmrootdirEncryptedFs.mkdirs(new Path(cmRootFallbackEncrypted)); + encryptedHiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmrootdirEncrypted); + encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmRootFallbackEncrypted); + + //Create cm in encrypted zone + HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); + shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db"); + shimCmEncrypted.createEncryptionZone(new Path(cmRootFallbackEncrypted), "test_key_db"); + ReplChangeManager.resetReplChangeManagerInstance(); + boolean exceptionThrown = false; + try { + new Warehouse(encryptedHiveConf); + } catch (MetaException e) { + exceptionThrown = true; + assertTrue(e.getMessage().contains("should not be encrypted")); + } + assertTrue(exceptionThrown); + } finally { + cmrootdirEncryptedFs.delete(new Path(cmrootdirEncrypted), true); + cmrootdirEncryptedFs.delete(new Path(cmRootFallbackEncrypted), true); + ReplChangeManager.resetReplChangeManagerInstance(); + initReplChangeManager(); + } + } + + @Test + public void testCmrootFallbackRelative() throws Exception { + HiveConf encryptedHiveConf = new HiveConf(TestReplChangeManager.class); + encryptedHiveConf.setBoolean(HiveConf.ConfVars.REPLCMENABLED.varname, true); + encryptedHiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60); + encryptedHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, + "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal); + String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmrootIsEncrypted"; + String cmRootFallbackEncrypted = "cmrootFallbackEncrypted"; + FileSystem cmrootdirEncryptedFs = new Path(cmrootdirEncrypted).getFileSystem(encryptedHiveConf); + try { + cmrootdirEncryptedFs.mkdirs(new Path(cmrootdirEncrypted)); + cmrootdirEncryptedFs.mkdirs(new Path(cmRootFallbackEncrypted)); + encryptedHiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmrootdirEncrypted); + encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmRootFallbackEncrypted); + + //Create cm in encrypted zone + HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); + shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db"); + + ReplChangeManager.resetReplChangeManagerInstance(); + boolean exceptionThrown = false; + try { + new Warehouse(encryptedHiveConf); + } catch (MetaException e) { + exceptionThrown = true; + assertTrue(e.getMessage().contains("should be absolute")); + } + assertTrue(exceptionThrown); + } finally { + cmrootdirEncryptedFs.delete(new Path(cmrootdirEncrypted), true); + cmrootdirEncryptedFs.delete(new Path(cmRootFallbackEncrypted), true); + ReplChangeManager.resetReplChangeManagerInstance(); + initReplChangeManager(); + } + } + + private void createFile(Path path, String content) throws IOException { FSDataOutputStream output = path.getFileSystem(hiveConf).create(path); diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java index 1041d925d9..c5df62ffca 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java @@ -59,7 +59,7 @@ private static boolean inited = false; private static boolean enabled = false; - private static Map encryptionZones = new HashMap<>(); + private static Map encryptionZoneToCmrootMapping = new HashMap<>(); private static HadoopShims hadoopShims = ShimLoader.getHadoopShims(); private static Configuration conf; private String msUser; @@ -156,23 +156,34 @@ private ReplChangeManager(Configuration conf) throws MetaException { cmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMDIR); encryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMENCRYPTEDDIR); fallbackNonEncryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR); + //validate cmRootEncrypted is absolute + Path cmRootEncrypted = new Path(encryptedCmRootDir); + if (cmRootEncrypted.isAbsolute()) { + throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be a relative path"); + } //Create default cm root Path cmroot = new Path(cmRootDir); createCmRoot(cmroot); FileSystem cmRootFs = cmroot.getFileSystem(conf); HdfsEncryptionShim pathEncryptionShim = hadoopShims .createHdfsEncryptionShim(cmRootFs, conf); - Path cmRootEncrypted = new Path(encryptedCmRootDir); - if (cmRootEncrypted.isAbsolute()) { - throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be a relative path"); - } if (pathEncryptionShim.isPathEncrypted(cmroot)) { //If cm root is encrypted we keep using it for the encryption zone String encryptionZonePath = cmRootFs.getUri() + pathEncryptionShim.getEncryptionZoneForPath(cmroot).getPath(); - encryptionZones.put(encryptionZonePath, cmRootDir); + encryptionZoneToCmrootMapping.put(encryptionZonePath, cmRootDir); } else { - encryptionZones.put(NO_ENCRYPTION, cmRootDir); + encryptionZoneToCmrootMapping.put(NO_ENCRYPTION, cmRootDir); + } + if (!StringUtils.isEmpty(fallbackNonEncryptedCmRootDir)) { + Path cmRootFallback = new Path(fallbackNonEncryptedCmRootDir); + if (!cmRootFallback.isAbsolute()) { + throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be absolute path"); + } + if (cmRootFs.exists(cmRootFallback) && pathEncryptionShim.isPathEncrypted(cmRootFallback)) { + throw new MetaException(ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.getHiveName() + + " should not be encrypted"); + } } UserGroupInformation usergroupInfo = UserGroupInformation.getCurrentUser(); msUser = usergroupInfo.getShortUserName(); @@ -500,7 +511,7 @@ static void scheduleCMClearer(Configuration conf) { .namingPattern(CM_THREAD_NAME_PREFIX + "%d") .daemon(true) .build()); - executor.scheduleAtFixedRate(new CMClearer(encryptionZones, + executor.scheduleAtFixedRate(new CMClearer(encryptionZoneToCmrootMapping, MetastoreConf.getTimeVar(conf, ConfVars.REPLCMRETIAN, TimeUnit.SECONDS), conf), 0, MetastoreConf.getTimeVar(conf, ConfVars.REPLCMINTERVAL, TimeUnit.SECONDS), TimeUnit.SECONDS); } @@ -553,14 +564,14 @@ static Path getCmRoot(Path path) throws IOException { //at the root of the encryption zone cmrootDir = encryptionZonePath + Path.SEPARATOR + encryptedCmRootDir; } - if (encryptionZones.containsKey(encryptionZonePath)) { - cmroot = new Path(encryptionZones.get(encryptionZonePath)); + if (encryptionZoneToCmrootMapping.containsKey(encryptionZonePath)) { + cmroot = new Path(encryptionZoneToCmrootMapping.get(encryptionZonePath)); } else { cmroot = new Path(cmrootDir); synchronized (instance) { - if (!encryptionZones.containsKey(encryptionZonePath)) { + if (!encryptionZoneToCmrootMapping.containsKey(encryptionZonePath)) { createCmRoot(cmroot); - encryptionZones.put(encryptionZonePath, cmrootDir); + encryptionZoneToCmrootMapping.put(encryptionZonePath, cmrootDir); } } } @@ -569,11 +580,22 @@ static Path getCmRoot(Path path) throws IOException { } private static void createCmRoot(Path cmroot) throws IOException { - FileSystem cmFs = cmroot.getFileSystem(conf); - // Create cmroot with permission 700 if not exist - if (!cmFs.exists(cmroot)) { - cmFs.mkdirs(cmroot); - cmFs.setPermission(cmroot, new FsPermission("700")); + Retry retriable = new Retry(IOException.class) { + @Override + public Void execute() throws IOException { + FileSystem cmFs = cmroot.getFileSystem(conf); + // Create cmroot with permission 700 if not exist + if (!cmFs.exists(cmroot)) { + cmFs.mkdirs(cmroot); + cmFs.setPermission(cmroot, new FsPermission("700")); + } + return null; + } + }; + try { + retriable.run(); + } catch (Exception e) { + throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } } @@ -582,7 +604,7 @@ static void resetReplChangeManagerInstance() { inited = false; enabled = false; instance = null; - encryptionZones.clear(); + encryptionZoneToCmrootMapping.clear(); } public static final PathFilter CMROOT_PATH_FILTER = new PathFilter() { @@ -590,8 +612,10 @@ static void resetReplChangeManagerInstance() { public boolean accept(Path p) { if (enabled) { String name = p.getName(); - return !name.contains(cmRootDir) && !name.contains(encryptedCmRootDir) - && !name.contains(fallbackNonEncryptedCmRootDir); + return StringUtils.isEmpty(fallbackNonEncryptedCmRootDir) + ? (!name.contains(cmRootDir) && !name.contains(encryptedCmRootDir)) + : (!name.contains(cmRootDir) && !name.contains(encryptedCmRootDir) + && !name.contains(fallbackNonEncryptedCmRootDir)); } return true; } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 2aeb37406a..58b67e888c 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -920,7 +920,7 @@ public static ConfVars getMetaConf(String name) { REPLCMENCRYPTEDDIR("metastore.repl.cm.encryptionzone.rootdir", "hive.repl.cm.encryptionzone.rootdir", ".cmroot", "Root dir for ChangeManager if encryption zones are enabled, used for deleted files."), REPLCMFALLBACKNONENCRYPTEDDIR("metastore.repl.cm.nonencryptionzone.rootdir", - "hive.repl.cm.nonencryptionzone.rootdir", "/user/${system:user.name}/cmroot/", + "hive.repl.cm.nonencryptionzone.rootdir", "", "Root dir for ChangeManager for non encrypted paths if hive.repl.cmrootdir is encrypted."), REPLCMRETIAN("metastore.repl.cm.retain", "hive.repl.cm.retain", 24, TimeUnit.HOURS, "Time to retain removed files in cmrootdir."), diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index ee9f988294..06625de67c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2840,27 +2840,7 @@ private boolean checkTableDataShouldBeDeleted(Table tbl, boolean deleteData) { */ private void deleteTableData(Path tablePath, boolean ifPurge, boolean shouldEnableCm) { if (tablePath != null) { - try { - if (shouldEnableCm) { - //Don't delete cmdir if its inside the table path - FileStatus[] statuses = tablePath.getFileSystem(conf).listStatus(tablePath, - ReplChangeManager.CMROOT_PATH_FILTER); - for (final FileStatus status : statuses) { - wh.deleteDir(status.getPath(), true, ifPurge, shouldEnableCm); - } - //Check if table directory is empty, delete it - FileStatus[] statusWithoutFilter = tablePath.getFileSystem(conf).listStatus(tablePath); - if (statusWithoutFilter.length == 0) { - wh.deleteDir(tablePath, true, ifPurge, shouldEnableCm); - } - } else { - //If no cm delete the complete table directory - wh.deleteDir(tablePath, true, ifPurge, shouldEnableCm); - } - } catch (Exception e) { - LOG.error("Failed to delete table directory: " + tablePath + - " " + e.getMessage()); - } + deleteDataExcludeCmroot(tablePath, ifPurge, shouldEnableCm); } } @@ -2895,27 +2875,7 @@ private void deleteTableData(Path tablePath, boolean ifPurge, Database db) { private void deletePartitionData(List partPaths, boolean ifPurge, boolean shouldEnableCm) { if (partPaths != null && !partPaths.isEmpty()) { for (Path partPath : partPaths) { - try { - if (shouldEnableCm) { - //Don't delete cmdir if its inside the partition path - FileStatus[] statuses = partPath.getFileSystem(conf).listStatus(partPath, - ReplChangeManager.CMROOT_PATH_FILTER); - for (final FileStatus status : statuses) { - wh.deleteDir(status.getPath(), true, ifPurge, shouldEnableCm); - } - //Check if table directory is empty, delete it - FileStatus[] statusWithoutFilter = partPath.getFileSystem(conf).listStatus(partPath); - if (statusWithoutFilter.length == 0) { - wh.deleteDir(partPath, true, ifPurge, shouldEnableCm); - } - } else { - //If no cm delete the complete table directory - wh.deleteDir(partPath, true, ifPurge, shouldEnableCm); - } - } catch (Exception e) { - LOG.error("Failed to delete partition directory: " + partPath + - " " + e.getMessage()); - } + deleteDataExcludeCmroot(partPath, ifPurge, shouldEnableCm); } } } @@ -2942,6 +2902,39 @@ private void deletePartitionData(List partPaths, boolean ifPurge, Database } } + /** + * Delete data from path excluding cmdir + * and for each that fails logs an error. + * + * @param path + * @param ifPurge completely purge the partition (skipping trash) while + * removing data from warehouse + * @param shouldEnableCm If cm should be enabled + */ + private void deleteDataExcludeCmroot(Path path, boolean ifPurge, boolean shouldEnableCm) { + try { + if (shouldEnableCm) { + //Don't delete cmdir if its inside the partition path + FileStatus[] statuses = path.getFileSystem(conf).listStatus(path, + ReplChangeManager.CMROOT_PATH_FILTER); + for (final FileStatus status : statuses) { + wh.deleteDir(status.getPath(), true, ifPurge, shouldEnableCm); + } + //Check if table directory is empty, delete it + FileStatus[] statusWithoutFilter = path.getFileSystem(conf).listStatus(path); + if (statusWithoutFilter.length == 0) { + wh.deleteDir(path, true, ifPurge, shouldEnableCm); + } + } else { + //If no cm delete the complete table directory + wh.deleteDir(path, true, ifPurge, shouldEnableCm); + } + } catch (Exception e) { + LOG.error("Failed to delete directory: " + path + + " " + e.getMessage()); + } + } + /** * Deletes the partitions specified by catName, dbName, tableName. If checkLocation is true, for * locations of partitions which may not be subdirectories of tablePath checks to make sure the