diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index d618ef96f30..87227f9566f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.net.URI; import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -32,6 +34,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.antlr.runtime.TokenRewriteStream; import org.apache.hadoop.conf.Configuration; @@ -642,49 +645,38 @@ private Path getExternalScratchDir(URI extURI) { return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !isExplainSkipExecution()); } - /** - * Remove any created scratch directories. - */ - public void removeResultCacheDir() { - if(this.fsResultCacheDirs != null) { - try { - Path p = this.fsResultCacheDirs; - FileSystem fs = p.getFileSystem(conf); - LOG.debug("Deleting result cache dir: {}", p); - fs.delete(p, true); - fs.cancelDeleteOnExit(p); - } catch (Exception e) { - LOG.warn("Error Removing result cache dir: " - + StringUtils.stringifyException(e)); - } + private List dirsToBeDeleted(boolean deleteResultDir) { + List toBeDeleted = new ArrayList<>(); + if (resDir != null) { + toBeDeleted.add(resDir); + } + if (deleteResultDir && fsResultCacheDirs != null) { + toBeDeleted.add(fsResultCacheDirs); } + toBeDeleted.addAll(fsScratchDirs.values()); + LOG.debug("Directories to be deleted: {}", toBeDeleted); + return removeRedundant(toBeDeleted); } /** - * Remove any created scratch directories. + * Return only those which are not subdirectories of others */ - public void removeScratchDir() { - String resultCacheDir = null; - if(this.fsResultCacheDirs != null) { - resultCacheDir = this.fsResultCacheDirs.toUri().getPath(); - } - for (Map.Entry entry : fsScratchDirs.entrySet()) { - try { - Path p = entry.getValue(); - if(resultCacheDir == null || !p.toUri().getPath().contains(resultCacheDir)) { - // delete only the paths which aren't result cache dir path - // because that will be taken care by removeResultCacheDir - FileSystem fs = p.getFileSystem(conf); - LOG.debug("Deleting scratch dir: {}", p); - fs.delete(p, true); - fs.cancelDeleteOnExit(p); + public static List removeRedundant(Collection dirs) { + List result = new ArrayList<>(); + List distinct = dirs.stream().distinct().collect(Collectors.toList()); + for (int i = 0; i < distinct.size(); i++) { + boolean subdir = false; + for (int j = 0; j < distinct.size(); j++) { + if (i != j && FileUtils.isPathWithinSubtree(distinct.get(i), distinct.get(j))) { + subdir = true; + break; } - } catch (Exception e) { - LOG.warn("Error Removing Scratch: " - + StringUtils.stringifyException(e)); + } + if (!subdir) { + result.add(distinct.get(i)); } } - fsScratchDirs.clear(); + return result; } /** @@ -842,18 +834,23 @@ public void clear(boolean deleteResultDir) throws IOException { for (Context subContext : rewrittenStatementContexts) { subContext.clear(); } - // Then clear this context - if (resDir != null) { - try { - FileSystem fs = resDir.getFileSystem(conf); - LOG.debug("Deleting result dir: {}", resDir); - fs.delete(resDir, true); - } catch (IOException e) { - LOG.info("Context clear error: " + StringUtils.stringifyException(e)); - } + removeMaterializedCTEs(); + + List toBeDeleted = dirsToBeDeleted(deleteResultDir); + for (Path each : toBeDeleted) { + try { + FileSystem fs = each.getFileSystem(conf); + LOG.debug("Deleting context dir: {}", each); + fs.delete(each, true); + fs.cancelDeleteOnExit(each); + } catch (Exception e) { + LOG.warn("Error Removing dir: " + + StringUtils.stringifyException(e)); } + } + fsScratchDirs.clear(); - if (resFile != null) { + if (resFile != null && !isSubDirOf(resFile.getParent(), toBeDeleted)) { try { FileSystem fs = resFile.getFileSystem(conf); LOG.debug("Deleting result file: {}", resFile); @@ -862,15 +859,15 @@ public void clear(boolean deleteResultDir) throws IOException { LOG.info("Context clear error: " + StringUtils.stringifyException(e)); } } - if(deleteResultDir) { - removeResultCacheDir(); - } - removeMaterializedCTEs(); - removeScratchDir(); + originalTracker = null; setNeedLockMgr(false); } + private static boolean isSubDirOf(Path dir, List toBeDeleted) { + return toBeDeleted.stream().anyMatch(each -> FileUtils.isPathWithinSubtree(dir, each)); + } + public DataInput getStream() { try { if (!initialized) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java index c9f67579cae..93552652bb9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java @@ -26,8 +26,12 @@ import org.junit.Test; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; import static org.mockito.Mockito.*; public class TestContext { @@ -70,4 +74,80 @@ public void testGetScratchDirectoriesForPaths() throws IOException { assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user"))); conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, true); } + + @Test + public void testPathsToBeDeletedSingle() { + List result = Context.removeRedundant(new ArrayList() {{ + add(new Path("/home/user1/tmp/a")); + }}); + assertThat(result, is(new ArrayList() {{ + add(new Path("/home/user1/tmp/a")); + }})); + } + + + @Test + public void testPathsToBeDeletedEmpty() { + List result = Context.removeRedundant(new ArrayList<>()); + assertThat(result, is(new ArrayList<>())); + } + + @Test + public void testPathsToBeDeletedSame() { + List result = Context.removeRedundant(new ArrayList() {{ + add(new Path("/home/user1/tmp")); + add(new Path("/home/user1/tmp")); + }}); + assertThat(result, is(new ArrayList() {{ + add(new Path("/home/user1/tmp")); + }})); + } + + @Test + public void testPathsToBeDeletedMultiple() { + List result = Context.removeRedundant(new ArrayList() {{ + add(new Path("/home/user1/tmp/a")); + add(new Path("/home/user1/tmp/a/b")); + add(new Path("/home/user1/tmp/a/b/c")); + add(new Path("/home/user1/tmp/b/c")); + add(new Path("/home/user1/tmp/b")); + add(new Path("/home/user1/tmp/b/d")); + add(new Path("/home/user1/tmp/bb")); + }}); + assertThat(result, is(new ArrayList() {{ + add(new Path("/home/user1/tmp/a")); + add(new Path("/home/user1/tmp/b")); + add(new Path("/home/user1/tmp/bb")); + }})); + } + + @Test + public void testPathsToBeDeletedMultiple2() { + List result = Context.removeRedundant(new ArrayList() {{ + add(new Path("/home/user1/tmp/a")); + add(new Path("/home/user1/tmp/a/b")); + add(new Path("/home/user1/tmp/a/b/c")); + add(new Path("/home/user1/tmp/b/c")); + add(new Path("/home/user1/tmp/b")); + add(new Path("/home/user1/tmp/b/d")); + add(new Path("/home")); + }}); + assertThat(result, is(new ArrayList() {{ + add(new Path("/home")); + }})); + } + + @Test + public void testPathsToBeDeletedDifferentFS() { + List result = Context.removeRedundant(new ArrayList() {{ + add(new Path("hdfs:///home/user1/tmp/a")); + add(new Path("hdfs:///home/user1/tmp/a/b")); + add(new Path("s3a:///home/user1/tmp/a/b")); + add(new Path("s3a:///home/user1/tmp/a/b/c")); + }}); + assertThat(result, is(new ArrayList() {{ + add(new Path("hdfs:///home/user1/tmp/a")); + add(new Path("s3a:///home/user1/tmp/a/b")); + }})); + } }