diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index 847c3bc..776eb1c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -27,6 +27,8 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -76,6 +78,7 @@ static final String DYNTEMP_DIR_NAME = "_DYN"; static final String SCRATCH_DIR_NAME = "_SCRATCH"; + static final Pattern SCRATCH_DIR_PATTERN = Pattern.compile("(.*/_SCRATCH\\d\\.?\\d+).*"); private static final String APPEND_SUFFIX = "_a_"; private static final int APPEND_COUNTER_WARN_THRESHOLD = 1000; private final int maxAppendAttempts; @@ -217,9 +220,18 @@ public void abortJob(JobContext jobContext, State state) throws IOException { // open and remove the directory anyway, but on Windows, OS refuse to remove a // directory containing open files. So on Windows, we will leave output directory // behind when job fail. User needs to remove the output directory manually - LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); if (!src.equals(tblPath)){ - fs.delete(src, true); + Matcher scratchDirMatcher = SCRATCH_DIR_PATTERN.matcher(src.toString()); + if(scratchDirMatcher.matches()){ + String scratchDir = scratchDirMatcher.group(1); + if(scratchDir.contains(tblPath.toString())) { + fs.delete(new Path(scratchDir), true); + LOG.info("Job failed. Try cleaning up temporary directory [{}].", scratchDir); + } + }else{ + fs.delete(src, true); + LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); + } } } finally { cancelDelegationTokens(jobContext);