From 9fb2570c9a83b0a7897e1f09198ef084c54ca32b Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 21 Jan 2016 17:39:27 -0800 Subject: [PATCH] HIVE-12908 : Improve dynamic partition loading III --- .../org/apache/hadoop/hive/common/FileUtils.java | 14 --- .../org/apache/hadoop/hive/ql/metadata/Hive.java | 99 +++++++--------------- 2 files changed, 29 insertions(+), 84 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 5dd9f40..8c9bd3d 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -578,20 +578,6 @@ public static boolean copy(FileSystem srcFS, Path src, * @param fs FileSystem to use * @param f path of directory * @param conf hive configuration - * @return true if deletion successful - * @throws FileNotFoundException - * @throws IOException - */ - public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) - throws FileNotFoundException, IOException { - return trashFilesUnderDir(fs, f, conf, true); - } - - /** - * Trashes or deletes all files under a directory. Leaves the directory as is. - * @param fs FileSystem to use - * @param f path of directory - * @param conf hive configuration * @param forceDelete whether to force delete files if trashing does not succeed * @return true if deletion successful * @throws FileNotFoundException diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 0bab769..570af51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -84,7 +84,6 @@ import org.apache.hadoop.hive.metastore.api.HiveObjectType; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.InsertEventRequestData; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; @@ -1478,11 +1477,11 @@ public Partition loadPartition(Path loadPath, Table tbl, newPartPath = oldPartPath; } List newFiles = null; - if (replace) { + if (replace || (oldPart == null && !isAcid)) { Hive.replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), isSrcLocal); } else { - if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) { + if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null) { newFiles = new ArrayList<>(); } @@ -1492,7 +1491,7 @@ public Partition loadPartition(Path loadPath, Table tbl, Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath); alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString()); validatePartition(newTPart); - if (oldPart != null && null != newFiles) { + if (null != newFiles) { fireInsertEvent(tbl, partSpec, newFiles); } @@ -2594,21 +2593,20 @@ public PrincipalPrivilegeSet get_privilege_set(HiveObjectType objectType, continue; } - // Strip off the file type, if any so we don't make: - // 000000_0.gz -> 000000_0.gz_copy_1 - String name = itemSource.getName(); - String filetype; - int index = name.lastIndexOf('.'); - if (index >= 0) { - filetype = name.substring(index); - name = name.substring(0, index); - } else { - filetype = ""; - } - Path itemDest = new Path(destf, itemSource.getName()); if (!replace) { + // Strip off the file type, if any so we don't make: + // 000000_0.gz -> 000000_0.gz_copy_1 + String name = itemSource.getName(); + String filetype; + int index = name.lastIndexOf('.'); + if (index >= 0) { + filetype = name.substring(index); + name = name.substring(0, index); + } else { + filetype = ""; + } // It's possible that the file we're copying may have the same // relative name as an existing file in the "destf" directory. // So let's make a quick check to see if we can rename any @@ -2723,7 +2721,10 @@ public static boolean moveFile(HiveConf conf, Path srcf, Path destf, try { if (inheritPerms || replace) { try{ - destStatus = shims.getFullFileStatus(conf, destFs, destf.getParent()); + destStatus = shims.getFullFileStatus(conf, destFs, destf); + if(destStatus.getFileStatus().isFile()) { + destStatus = shims.getFullFileStatus(conf, destFs, destf.getParent()); + } //if destf is an existing directory: //if replace is true, delete followed by rename(mv) is equivalent to replace //if replace is false, rename (mv) actually move the src under dest dir @@ -2988,55 +2989,6 @@ protected static void replaceFiles(Path tablePath, Path srcf, Path destf, Path o try { FileSystem destFs = destf.getFileSystem(conf); - - // check if srcf contains nested sub-directories - FileStatus[] srcs; - FileSystem srcFs; - try { - srcFs = srcf.getFileSystem(conf); - srcs = srcFs.globStatus(srcf); - } catch (IOException e) { - throw new HiveException("Getting globStatus " + srcf.toString(), e); - } - if (srcs == null) { - LOG.info("No sources specified to move: " + srcf); - return; - } - List> result = checkPaths(conf, destFs, srcs, srcFs, destf, true); - - if (oldPath != null) { - boolean oldPathDeleted = false; - boolean isOldPathUnderDestf = false; - try { - FileSystem fs2 = oldPath.getFileSystem(conf); - if (fs2.exists(oldPath)) { - // Do not delete oldPath if: - // - destf is subdir of oldPath - //if ( !(fs2.equals(destf.getFileSystem(conf)) && FileUtils.isSubDir(oldPath, destf, fs2))) - isOldPathUnderDestf = FileUtils.isSubDir(oldPath, destf, fs2); - if (isOldPathUnderDestf) { - // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its - // existing content might result in incorrect (extra) data. - // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is - // not the destf or its subdir? - oldPathDeleted = FileUtils.trashFilesUnderDir(fs2, oldPath, conf); - } - } - } catch (IOException e) { - if (isOldPathUnderDestf) { - // if oldPath is a subdir of destf but it could not be cleaned - throw new HiveException("Directory " + oldPath.toString() - + " could not be cleaned up.", e); - } else { - //swallow the exception since it won't affect the final result - LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e); - } - } - if (isOldPathUnderDestf && !oldPathDeleted) { - throw new HiveException("Destination directory " + destf + " has not be cleaned up."); - } - } - // first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates // destf with inherited permissions boolean destfExist = FileUtils.mkdir(destFs, destf, true, conf); @@ -3051,10 +3003,17 @@ protected static void replaceFiles(Path tablePath, Path srcf, Path destf, Path o // directory if it is the root of an HDFS encryption zone. // 2. srcs must be a list of files -- ensured by LoadSemanticAnalyzer // in both cases, we move the file under destf - for (List sdpairs : result) { - for (Path[] sdpair : sdpairs) { - if (!moveFile(conf, sdpair[0], sdpair[1], true, isSrcLocal)) { - throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]); + + FileSystem srcFS = srcf.getFileSystem(conf); + FileStatus[] srcs = srcFS.globStatus(srcf); + if (srcs.length == 1 && srcs[0].isDirectory()) { + if (!moveFile(conf, srcs[0].getPath(), destf, true, isSrcLocal)) { + throw new IOException("Error moving: " + srcf + " into: " + destf); + } + } else { // its either a file or glob + for (FileStatus src : srcs) { + if (!moveFile(conf, src.getPath(), new Path(destf, src.getPath().getName()), true, isSrcLocal)) { + throw new IOException("Error moving: " + srcf + " into: " + destf); } } } -- 1.7.12.4 (Apple Git-37)