diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8ef978e..cf44cfe 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1335,6 +1335,7 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\ infer_bucket_sort_num_buckets.q,\ infer_bucket_sort_reducers_power_two.q,\ input16_cc.q,\ + insert_overwrite_directory2.q,\ leftsemijoin_mr.q,\ list_bucket_dml_10.q,\ load_fs2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 12b3b1f..c626add 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -115,6 +115,7 @@ private void moveFileInDfs (Path sourcePath, Path targetPath, FileSystem fs) if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INSERT_INTO_MULTILEVEL_DIRS)) { deletePath = createTargetPath(targetPath, fs); } + Hive.clearDestForSubDirSrc(conf, targetPath, sourcePath, false); if (!Hive.moveFile(conf, sourcePath, targetPath, true, false)) { try { if (deletePath != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index e5d5126..82abd52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2761,8 +2761,8 @@ private static boolean isSubDir(Path srcf, Path destf, FileSystem srcFs, FileSys return false; } - String fullF1 = getQualifiedPathWithoutSchemeAndAuthority(srcf, srcFs); - String fullF2 = getQualifiedPathWithoutSchemeAndAuthority(destf, destFs); + String fullF1 = getQualifiedPathWithoutSchemeAndAuthority(srcf, srcFs) + Path.SEPARATOR; + String fullF2 = getQualifiedPathWithoutSchemeAndAuthority(destf, destFs) + Path.SEPARATOR; boolean isInTest = HiveConf.getBoolVar(srcFs.getConf(), ConfVars.HIVE_IN_TEST); // In the automation, the data warehouse is the local file system based. @@ -2818,6 +2818,33 @@ private static Path mvFile(HiveConf conf, Path srcf, Path destf, boolean isSrcLo return destf; } + // Clears the dest dir when src is sub-dir of dest. + public static void clearDestForSubDirSrc(final HiveConf conf, Path dest, + Path src, boolean isSrcLocal) throws IOException { + FileSystem destFS = dest.getFileSystem(conf); + FileSystem srcFS = src.getFileSystem(conf); + if (isSubDir(src, dest, srcFS, destFS, isSrcLocal)) { + final Path fullSrcPath = new Path( + getQualifiedPathWithoutSchemeAndAuthority(src, srcFS)); + final Path fullDestPath = new Path( + getQualifiedPathWithoutSchemeAndAuthority(dest, destFS)); + if (fullSrcPath.equals(fullDestPath)) { + return; + } + Path parent = fullSrcPath; + while (!parent.getParent().equals(fullDestPath)) { + parent = parent.getParent(); + } + FileStatus[] existingFiles = destFS.listStatus( + dest, FileUtils.HIDDEN_FILES_PATH_FILTER); + for (FileStatus fileStatus : existingFiles) { + if (!fileStatus.getPath().getName().equals(parent.getName())) { + destFS.delete(fileStatus.getPath(), true); + } + } + } + } + //it is assumed that parent directory of the destf should already exist when this //method is called. when the replace value is true, this method works a little different //from mv command if the destf is a directory, it replaces the destf instead of moving under diff --git a/ql/src/test/queries/clientpositive/insert_overwrite_directory2.q b/ql/src/test/queries/clientpositive/insert_overwrite_directory2.q new file mode 100644 index 0000000..dd337a9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_overwrite_directory2.q @@ -0,0 +1,17 @@ +create external table result(key string) location "${system:test.tmp.dir}/result"; + +set mapreduce.job.reduces=2; + +insert overwrite directory "${system:test.tmp.dir}/result" +select key from src group by key; + +select count(*) from result; + +set mapreduce.job.reduces=1; + +insert overwrite directory "${system:test.tmp.dir}/result" +select key from src group by key; + +select count(*) from result; + +drop table result; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/insert_overwrite_directory2.q.out b/ql/src/test/results/clientpositive/insert_overwrite_directory2.q.out new file mode 100644 index 0000000..c3232e7 --- /dev/null +++ b/ql/src/test/results/clientpositive/insert_overwrite_directory2.q.out @@ -0,0 +1,54 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@result +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@result +#### A masked pattern was here #### +select key from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +select key from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: select count(*) from result +PREHOOK: type: QUERY +PREHOOK: Input: default@result +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@result +#### A masked pattern was here #### +309 +#### A masked pattern was here #### +select key from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +select key from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: select count(*) from result +PREHOOK: type: QUERY +PREHOOK: Input: default@result +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@result +#### A masked pattern was here #### +309 +PREHOOK: query: drop table result +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@result +PREHOOK: Output: default@result +POSTHOOK: query: drop table result +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@result +POSTHOOK: Output: default@result diff --git a/ql/src/test/results/clientpositive/spark/insert_overwrite_directory2.q.out b/ql/src/test/results/clientpositive/spark/insert_overwrite_directory2.q.out new file mode 100644 index 0000000..c3232e7 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/insert_overwrite_directory2.q.out @@ -0,0 +1,54 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@result +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@result +#### A masked pattern was here #### +select key from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +select key from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: select count(*) from result +PREHOOK: type: QUERY +PREHOOK: Input: default@result +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@result +#### A masked pattern was here #### +309 +#### A masked pattern was here #### +select key from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +select key from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: select count(*) from result +PREHOOK: type: QUERY +PREHOOK: Input: default@result +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@result +#### A masked pattern was here #### +309 +PREHOOK: query: drop table result +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@result +PREHOOK: Output: default@result +POSTHOOK: query: drop table result +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@result +POSTHOOK: Output: default@result