diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 61aca56bac..0544f04375 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -674,10 +674,11 @@ public static boolean distCp(FileSystem srcFS, List srcPaths, Path dst, LOG.debug("copying srcPaths : {}, to DestPath :{} ,with doAs: {}", StringUtils.join(",", srcPaths), dst.toString(), proxyUser); boolean copied = false; + boolean blobStore = BlobStorageUtils.isBlobStorageFileSystem(conf, dst.getFileSystem(conf)); if (proxyUser == null){ - copied = shims.runDistCp(srcPaths, dst, conf); + copied = shims.runDistCp(srcPaths, dst, conf, blobStore); } else { - copied = shims.runDistCpAs(srcPaths, dst, conf, proxyUser); + copied = shims.runDistCpAs(srcPaths, dst, conf, blobStore, proxyUser); } if (copied && deleteSource) { if (proxyUser != null) { diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e7724f9084..8960c96939 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4852,7 +4852,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal /* BLOBSTORE section */ - HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "s3,s3a,s3n", + HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "s3,s3a,s3n,abfs", "Comma-separated list of supported blobstore schemes."), HIVE_BLOBSTORE_USE_BLOBSTORE_AS_SCRATCHDIR("hive.blobstore.use.blobstore.as.scratchdir", false, diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java index 9b5748e724..51a2497802 100644 --- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java +++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java @@ -225,10 +225,12 @@ public void testCopyWithDistcp() throws IOException { when(mockFs.getContentSummary(any(Path.class))).thenReturn(mockContentSummary); HadoopShims shims = mock(HadoopShims.class); - when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf)).thenReturn(true); + when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf, + BlobStorageUtils.isBlobStorageFileSystem(conf, copyDst.getFileSystem(conf)))).thenReturn(true); Assert.assertTrue(FileUtils.copy(mockFs, copySrc, mockFs, copyDst, false, false, conf, shims)); - verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf); + verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf, + BlobStorageUtils.isBlobStorageFileSystem(conf, copyDst.getFileSystem(conf))); } @Test @@ -244,15 +246,19 @@ public void testCopyWithDistCpAs() throws IOException { doAsUser, UserGroupInformation.getLoginUser()); HadoopShims shims = mock(HadoopShims.class); - when(shims.runDistCpAs(Collections.singletonList(copySrc), copyDst, conf, proxyUser)).thenReturn(true); - when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf)).thenReturn(false); + when(shims.runDistCpAs(Collections.singletonList(copySrc), copyDst, conf, + BlobStorageUtils.isBlobStorageFileSystem(conf, copyDst.getFileSystem(conf)), proxyUser)).thenReturn(true); + when(shims.runDistCp(Collections.singletonList(copySrc), copyDst, conf, + BlobStorageUtils.isBlobStorageFileSystem(conf, copyDst.getFileSystem(conf)))).thenReturn(false); // doAs when asked Assert.assertTrue(FileUtils.distCp(fs, Collections.singletonList(copySrc), copyDst, false, proxyUser, conf, shims)); - verify(shims).runDistCpAs(Collections.singletonList(copySrc), copyDst, conf, proxyUser); + verify(shims).runDistCpAs(Collections.singletonList(copySrc), copyDst, conf, + BlobStorageUtils.isBlobStorageFileSystem(conf, copyDst.getFileSystem(conf)), proxyUser); // don't doAs when not asked Assert.assertFalse(FileUtils.distCp(fs, Collections.singletonList(copySrc), copyDst, true, null, conf, shims)); - verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf); + verify(shims).runDistCp(Collections.singletonList(copySrc), copyDst, conf, + BlobStorageUtils.isBlobStorageFileSystem(conf, copyDst.getFileSystem(conf))); // When distcp is done with doAs, the delete should also be done as doAs. But in current code its broken. This // should be fixed. For now check is added to avoid wrong usage. So if doAs is set, delete source should be false. diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index f55b3ff1f3..7f83157a43 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -1141,13 +1141,13 @@ public void setStoragePolicy(Path path, StoragePolicyValue policy) } @Override - public boolean runDistCpAs(List srcPaths, Path dst, Configuration conf, + public boolean runDistCpAs(List srcPaths, Path dst, Configuration conf, boolean blobStore, UserGroupInformation proxyUser) throws IOException { try { return proxyUser.doAs(new PrivilegedExceptionAction() { @Override public Boolean run() throws Exception { - return runDistCp(srcPaths, dst, conf); + return runDistCp(srcPaths, dst, conf, blobStore); } }); } catch (InterruptedException e) { @@ -1156,13 +1156,16 @@ public Boolean run() throws Exception { } @Override - public boolean runDistCp(List srcPaths, Path dst, Configuration conf) throws IOException { - DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst) - .withSyncFolder(true) - .withDeleteMissing(true) - .preserve(FileAttribute.BLOCKSIZE) - .preserve(FileAttribute.XATTR) - .build(); + public boolean runDistCp(List srcPaths, Path dst, Configuration conf, boolean blobStore) throws IOException { + DistCpOptions.Builder builderOptions = new DistCpOptions.Builder(srcPaths, dst) + .withSyncFolder(true) + .withDeleteMissing(true) + .preserve(FileAttribute.BLOCKSIZE); + + if (!blobStore) { + builderOptions = builderOptions.preserve(FileAttribute.XATTR); + } + DistCpOptions options = builderOptions.build(); // Creates the command-line parameters for distcp List params = constructDistCpParams(srcPaths, dst, conf); diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 49a2ab3616..1b243e1970 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -500,11 +500,12 @@ public void checkFileAccess(FileSystem fs, FileStatus status, FsAction action) * @param srcPaths List of Path to the source files or directories to copy * @param dst Path to the destination file or directory * @param conf The hadoop configuration object + * @param blobStore True if the destination is a blob store * @param proxyUser The user to perform the distcp as * @return True if it is successfull; False otherwise. */ - boolean runDistCpAs(List srcPaths, Path dst, Configuration conf, UserGroupInformation proxyUser) - throws IOException; + boolean runDistCpAs(List srcPaths, Path dst, Configuration conf, boolean blobStore, + UserGroupInformation proxyUser) throws IOException; /** * Copies a source dir/file to a destination by orchestrating the copy between hdfs nodes. @@ -514,9 +515,10 @@ boolean runDistCpAs(List srcPaths, Path dst, Configuration conf, UserGroup * @param srcPaths List of Path to the source files or directories to copy * @param dst Path to the destination file or directory * @param conf The hadoop configuration object + * @param blobStore True if the destination is a blob store * @return True if it is successfull; False otherwise. */ - public boolean runDistCp(List srcPaths, Path dst, Configuration conf) throws IOException; + public boolean runDistCp(List srcPaths, Path dst, Configuration conf, boolean blobStore) throws IOException; /** * This interface encapsulates methods used to get encryption information from