diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java index 38e843aeacf..9ce379bb354 100755 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -690,7 +690,7 @@ public static String makePartName(List partCols, try { Path path = new Path(location); FileSystem fileSys = path.getFileSystem(conf); - return FileUtils.getFileStatusRecurse(path, -1, fileSys); + return FileUtils.getFileStatusRecurse(path, fileSys); } catch (IOException ioe) { MetaStoreUtils.logAndThrowMetaException(ioe); } @@ -708,7 +708,7 @@ public static String makePartName(List partCols, Path tablePath = getDnsPath(new Path(table.getSd().getLocation())); try { FileSystem fileSys = tablePath.getFileSystem(conf); - return FileUtils.getFileStatusRecurse(tablePath, -1, fileSys); + return FileUtils.getFileStatusRecurse(tablePath, fileSys); } catch (IOException ioe) { MetaStoreUtils.logAndThrowMetaException(ioe); } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java index bf206fffc26..b49fafab4ed 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java @@ -311,41 +311,31 @@ public static String unescapePathName(String path) { /** * Get all file status from a root path and recursively go deep into certain levels. * - * @param path + * @param base * the root path - * @param level - * the depth of directory to explore * @param fs * the file system * @return array of FileStatus * @throws IOException */ - public static List getFileStatusRecurse(Path path, int level, FileSystem fs) - throws IOException { - - // if level is <0, the return all files/directories under the specified path - if (level < 0) { - List result = new ArrayList<>(); - try { - FileStatus fileStatus = fs.getFileStatus(path); - FileUtils.listStatusRecursively(fs, fileStatus, result); - } catch (IOException e) { - // globStatus() API returns empty FileStatus[] when the specified path - // does not exist. But getFileStatus() throw IOException. To mimic the - // similar behavior we will return empty array on exception. For external - // tables, the path of the table will not exists during table creation - return new ArrayList<>(0); + public static List getFileStatusRecurse(Path base, FileSystem fs) { + try { + List results = new ArrayList<>(); + if (isS3a(fs)) { + // S3A file system has an optimized recursive directory listing implementation however it doesn't support filtering. + // Therefore we filter the result set afterwards. This might be not so optimal in HDFS case (which does a tree walking) where a filter could have been used. + return listS3FilesRecursive(base, fs, results); + } else { + listStatusRecursively(fs, fs.getFileStatus(base), results); + return results; } - return result; - } - - // construct a path pattern (e.g., /*/*) to find all dynamically generated paths - StringBuilder sb = new StringBuilder(path.toUri().getPath()); - for (int i = 0; i < level; i++) { - sb.append(Path.SEPARATOR).append("*"); + } catch (IOException e) { + // globStatus() API returns empty FileStatus[] when the specified path + // does not exist. But getFileStatus() throw IOException. To mimic the + // similar behavior we will return empty array on exception. For external + // tables, the path of the table will not exists during table creation + return new ArrayList<>(0); } - Path pathPattern = new Path(path, sb.toString()); - return Lists.newArrayList(fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER)); } /** @@ -373,6 +363,26 @@ public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus, } } + private static List listS3FilesRecursive(Path base, FileSystem fs, List results) throws IOException { + RemoteIterator remoteIterator = fs.listFiles(base, true); + while (remoteIterator.hasNext()) { + LocatedFileStatus each = remoteIterator.next(); + Path relativePath = new Path(each.getPath().toString().replace(base.toString(), "")); + if (RemoteIteratorWithFilter.HIDDEN_FILES_FULL_PATH_FILTER.accept(relativePath)) { + results.add(each); + } + } + return results; + } + + public static boolean isS3a(FileSystem fs) { + try { + return "s3a".equalsIgnoreCase(fs.getScheme()); + } catch (UnsupportedOperationException ex) { + return false; + } + } + public static String makePartName(List partCols, List vals) { return makePartName(partCols, vals, null); }