diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index ec2f9f0ac8..cd7a7e6eaa 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -645,6 +645,11 @@ public static boolean distCp(FileSystem srcFS, List srcPaths, Path dst, copied = shims.runDistCpAs(srcPaths, dst, conf, doAsUser); } if (copied && deleteSource) { + if (doAsUser != null) { + // if distcp is done using doAsUser, delete also should be done using same user. + //TODO : Need to change the delete execution within doAs if doAsUser is given. + throw new IOException("Distcp is called with doAsUser and delete source set as true"); + } for (Path path : srcPaths) { srcFS.delete(path, true); } diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java index 175fb072cd..b45832ede7 100644 --- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java +++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java @@ -245,10 +245,20 @@ public void testCopyWithDistCpAs() throws IOException { when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf)).thenReturn(false); // doAs when asked - Assert.assertTrue(FileUtils.distCp(fs, Collections.singletonList(copySrc), copyDst, true, doAsUser, conf, shims)); + Assert.assertTrue(FileUtils.distCp(fs, Collections.singletonList(copySrc), copyDst, false, doAsUser, conf, shims)); verify(shims).runDistCpAs(Collections.singletonList(copySrc), copyDst, conf, doAsUser); // don't doAs when not asked Assert.assertFalse(FileUtils.distCp(fs, Collections.singletonList(copySrc), copyDst, true, null, conf, shims)); verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf); + + // When distcp is done with doAs, the delete should also be done as doAs. But in current code its broken. This + // should be fixed. For now check is added to avoid wrong usage. So if doAs is set, delete source should be false. + try { + FileUtils.distCp(fs, Collections.singletonList(copySrc), copyDst, true, doAsUser, conf, shims); + Assert.assertTrue("Should throw IOException as doAs is called with delete source set to true".equals("")); + } catch (IOException e) { + Assert.assertTrue(e.getMessage(). + equalsIgnoreCase("Distcp is called with doAsUser and delete source set as true")); + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 012a670064..bd60317f26 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -4016,12 +4016,22 @@ private static Path mvFile(HiveConf conf, FileSystem sourceFs, Path sourcePath, destFs.copyFromLocalFile(sourcePath, destFilePath); } else { if (!FileUtils.copy(sourceFs, sourcePath, destFs, destFilePath, - true, // delete source + false, // delete source false, // overwrite destination conf)) { LOG.error("Copy failed for source: " + sourcePath + " to destination: " + destFilePath); throw new IOException("File copy failed."); } + + // Source file delete may fail because of permission issue as executing user might not + // have permission to delete the files in the source path. Ignore this failure. + try { + if (!sourceFs.delete(sourcePath, true)) { + LOG.warn("Delete source failed for source: " + sourcePath + " during copy to destination: " + destFilePath); + } + } catch (Exception e) { + LOG.warn("Delete source failed for source: " + sourcePath + " during copy to destination: " + destFilePath, e); + } } return destFilePath; }