diff --git common/src/java/org/apache/hadoop/hive/common/JavaUtils.java common/src/java/org/apache/hadoop/hive/common/JavaUtils.java index 7894ec101e..3d3f9da6db 100644 --- common/src/java/org/apache/hadoop/hive/common/JavaUtils.java +++ common/src/java/org/apache/hadoop/hive/common/JavaUtils.java @@ -223,20 +223,4 @@ public boolean accept(Path path) { return !isMatch; } } - - public static class AnyIdDirFilter implements PathFilter { - @Override - public boolean accept(Path path) { - String name = path.getName(); - //todo: what if this is a base? - if (!name.startsWith(DELTA_PREFIX + "_")) return false; - String idStr = name.substring(DELTA_PREFIX.length() + 1, DELTA_PREFIX.length() + 1 + DELTA_DIGITS_LEN); - try { - Long.parseLong(idStr);//what for? sanity check? - } catch (NumberFormatException ex) { - return false; - } - return true; - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index ce683c8a8d..b0ec5abcce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.parse.repl.dump.io.FileOperations; import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -112,7 +113,7 @@ protected int copyOnePath(Path fromPath, Path toPath) { if (!fs.exists(path)) return null; if (!isSourceMm) return matchFilesOneDir(fs, path, null); // Note: this doesn't handle list bucketing properly; neither does the original code. - FileStatus[] mmDirs = fs.listStatus(path, new JavaUtils.AnyIdDirFilter()); + FileStatus[] mmDirs = fs.listStatus(path, new AcidUtils.AnyIdDirFilter()); if (mmDirs == null || mmDirs.length == 0) return null; List allFiles = new ArrayList(); for (FileStatus mmDir : mmDirs) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 5fbe045df5..2b30106e5d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -4076,7 +4076,7 @@ private static void tryDelete(FileSystem fs, Path path) { Boolean isBaseDir) throws IOException { int skipLevels = dpLevels + lbLevels; if (filter == null) { - filter = new JavaUtils.IdPathFilter(writeId, stmtId, true, false); + filter = new AcidUtils.IdPathFilter(writeId, stmtId, true, false); } if (skipLevels == 0) { return statusToPath(fs.listStatus(path, filter)); @@ -4084,7 +4084,7 @@ private static void tryDelete(FileSystem fs, Path path) { // TODO: for some reason, globStatus doesn't work for masks like "...blah/*/delta_0000007_0000007*" // the last star throws it off. So, for now, if stmtId is missing use recursion. // For the same reason, we cannot use it if we don't know isBaseDir. Currently, we don't - // /want/ to know isBaseDir because that is error prone; so, it ends up never being used. + // /want/ to know isBaseDir because that is error prone; so, it ends up never being used. if (stmtId < 0 || isBaseDir == null || (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs))) { return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter); @@ -4183,7 +4183,7 @@ private static boolean isS3(FileSystem fs) { } private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir, - int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, long writeId, int stmtId, + int dpLevels, int lbLevels, AcidUtils.IdPathFilter filter, long writeId, int stmtId, Configuration conf) throws IOException { Path[] files = getMmDirectoryCandidates( fs, specPath, dpLevels, lbLevels, filter, writeId, stmtId, conf, null); @@ -4250,7 +4250,7 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con FileSystem fs = specPath.getFileSystem(hconf); Path manifestDir = getManifestDir(specPath, writeId, stmtId, unionSuffix, isInsertOverwrite); if (!success) { - JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(writeId, stmtId, true); + AcidUtils.IdPathFilter filter = new AcidUtils.IdPathFilter(writeId, stmtId, true); tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels, filter, writeId, stmtId, hconf); return; @@ -4275,7 +4275,7 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con } Utilities.FILE_OP_LOGGER.debug("Looking for files in: {}", specPath); - JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(writeId, stmtId, true, false); + AcidUtils.IdPathFilter filter = new AcidUtils.IdPathFilter(writeId, stmtId, true, false); if (isMmCtas && !fs.exists(specPath)) { Utilities.FILE_OP_LOGGER.info("Creating table directory for CTAS with no output at {}", specPath); FileUtils.mkdir(fs, specPath, hconf); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 4760b85da0..c790a4adf9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -1820,4 +1820,73 @@ public static boolean isAcidEnabled(HiveConf hiveConf) { } return false; } + + + public static class AnyIdDirFilter implements PathFilter { + @Override + public boolean accept(Path path) { + String name = path.getName(); + // Only look for regular deltas, not delete ones. + if (name.startsWith(DELTA_PREFIX + "_")) { + int idStartIx = DELTA_PREFIX.length() + 1; + int idEndIx = name.indexOf('_', idStartIx); + if (idEndIx == -1) return false; + String idStr = name.substring(idStartIx, idEndIx); + try { + Long.parseLong(idStr); + } catch (NumberFormatException ex) { + return false; + } + return true; + } + if (name.startsWith(BASE_PREFIX + "_")) { + String idStr = name.substring(BASE_PREFIX.length() + 1); + try { + Long.parseLong(idStr); + } catch (NumberFormatException ex) { + return false; + } + return true; + } + return false; + } + } + + public static class IdPathFilter implements PathFilter { + private String baseDirName, deltaDirName; + private final boolean isMatch, isIgnoreTemp, isDeltaPrefix; + + public IdPathFilter(long writeId, int stmtId, boolean isMatch) { + this(writeId, stmtId, isMatch, false); + } + + public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp) { + String deltaDirName = null; + deltaDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" + + String.format(DELTA_DIGITS, writeId) + "_"; + isDeltaPrefix = (stmtId < 0); + if (!isDeltaPrefix) { + deltaDirName += String.format(STATEMENT_DIGITS, stmtId); + } + + this.baseDirName = BASE_PREFIX + "_" + String.format(DELTA_DIGITS, writeId); + this.deltaDirName = deltaDirName; + this.isMatch = isMatch; + this.isIgnoreTemp = isIgnoreTemp; + } + + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (name.equals(baseDirName) || (isDeltaPrefix && name.startsWith(deltaDirName)) + || (!isDeltaPrefix && name.equals(deltaDirName))) { + return isMatch; + } + if (isIgnoreTemp && name.length() > 0) { + char c = name.charAt(0); + if (c == '.' || c == '_') return false; // Regardless of isMatch, ignore this. + } + return !isMatch; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 49c355be01..1182a4bdda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1750,7 +1750,7 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId)); // TODO: loadFileType for MM table will no longer be REPLACE_ALL filter = (loadFileType == LoadFileType.REPLACE_ALL) - ? new JavaUtils.IdPathFilter(writeId, stmtId, false, true) : filter; + ? new AcidUtils.IdPathFilter(writeId, stmtId, false, true) : filter; } else if(!isAcidIUDoperation && isFullAcidTable) { destPath = fixFullAcidPathForLoadData(loadFileType, destPath, writeId, stmtId, tbl); @@ -2327,7 +2327,7 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId)); // TODO: loadFileType for MM table will no longer be REPLACE_ALL filter = loadFileType == LoadFileType.REPLACE_ALL - ? new JavaUtils.IdPathFilter(writeId, stmtId, false, true) : filter; + ? new AcidUtils.IdPathFilter(writeId, stmtId, false, true) : filter; } else if(!isAcidIUDoperation && isFullAcidTable) { destPath = fixFullAcidPathForLoadData(loadFileType, destPath, writeId, stmtId, tbl);