diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index b19c1aa1cb..26852d6674 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.parse; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; @@ -2777,6 +2779,40 @@ public void testConstraints() throws IOException { } } + @Test + public void testDeleteStagingDir() throws IOException { + String testName = "deleteStagingDir"; + String dbName = createDB(testName, driver); + String tableName = "unptned"; + run("CREATE TABLE " + dbName + "." + tableName + "(a string) STORED AS TEXTFILE", driver); + + String[] unptn_data = new String[] {"one", "two"}; + String unptn_locn = new Path(TEST_PATH , testName + "_unptn").toUri().getPath(); + createTestDataFile(unptn_locn, unptn_data); + + run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + dbName + ".unptned", driver); + verifySetup("SELECT * from " + dbName + ".unptned", unptn_data, driver); + + // Tables set up, let's replicate them over + advanceDumpDir(); + run("REPL DUMP " + dbName, driver); + String replDumpLocn = getResult(0,0,driver); + run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'", driverMirror); + verifyRun("SELECT * from " + dbName + "_dupe.unptned", unptn_data, driverMirror); + Path warehouse = new Path(System.getProperty("test.warehouse.dir", "/tmp")); + FileSystem fs = FileSystem.get(warehouse.toUri(), hconf); + Path path = new Path(warehouse, dbName + "_dupe.db" + Path.SEPARATOR + tableName); + PathFilter filter = new PathFilter() + { + @Override + public boolean accept(Path path) + { + return path.getName().startsWith(".hive-staging"); + } + }; + verifyIfDirNotExist(fs, path, filter); + } + private static String createDB(String name, Driver myDriver) { LOG.info("Testing " + name); String dbName = name + "_" + tid; @@ -3024,6 +3060,15 @@ private void verifyIfPartitionExist(String dbName, String tableName, List getFsScratchDirs() { + return fsScratchDirs; + } public Map getLoadTableOutputMap() { return loadTableOutputMap; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java index cd31b173a3..cfe2ca5cb1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/ReplLoadTask.java @@ -19,7 +19,6 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -39,7 +38,6 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.util.Context; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.repl.ReplLogger; -import org.apache.hadoop.hive.ql.plan.ImportTableDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import java.io.Serializable; @@ -187,6 +185,9 @@ a database ( directory ) } this.childTasks = scope.rootTasks; LOG.info("Root Tasks / Total Tasks : {} / {} ", childTasks.size(), loadTaskTracker.numberOfTasks()); + + // Populate the driver context with the scratch dir info from the repl context, so that the temp dirs will be cleaned up later + driverContext.getCtx().getFsScratchDirs().putAll(context.utils.getFsScratchDirs()); } catch (Exception e) { LOG.error("failed replication", e); setException(e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathUtils.java index d0b7bdac7d..5033ac8ccd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/util/PathUtils.java @@ -46,6 +46,10 @@ Licensed to the Apache Software Foundation (ASF) under one stagingDir = HiveConf.getVar(hiveConf, HiveConf.ConfVars.STAGINGDIR); } + public Map getFsScratchDirs() { + return fsScratchDirs; + } + public synchronized Path getExternalTmpPath(Path path) { URI extURI = path.toUri(); if (extURI.getScheme().equals("viewfs")) {