From 4151427d018acb0a7a0a5d8f45c0469aa501e936 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Sat, 30 Apr 2016 18:04:37 -0700 Subject: [PATCH] HIVE-13661: [Refactor] Move common FS operations out of shim layer --- .../org/apache/hadoop/hive/common/FileUtils.java | 19 +-- .../hadoop/hive/metastore/HiveMetaStoreFsImpl.java | 11 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 20 ++- .../org/apache/hadoop/hive/ql/exec/MoveTask.java | 7 +- .../org/apache/hadoop/hive/ql/metadata/Hive.java | 27 ++-- .../hadoop/hive/ql/session/SessionState.java | 2 +- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 148 ------------------- .../java/org/apache/hadoop/hive/io/HdfsUtils.java | 156 ++++++++++++++++++--- .../org/apache/hadoop/hive/shims/HadoopShims.java | 41 ------ .../hadoop/hive/shims/HadoopShimsSecure.java | 10 -- 10 files changed, 164 insertions(+), 277 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index f7d41cd..71c9188 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -36,10 +36,11 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; @@ -526,11 +527,9 @@ public static boolean mkdir(FileSystem fs, Path f, boolean inheritPerms, Configu if (!success) { return false; } else { - HadoopShims shim = ShimLoader.getHadoopShims(); - HdfsFileStatus fullFileStatus = shim.getFullFileStatus(conf, fs, lastExistingParent); try { //set on the entire subtree - shim.setFullFileStatus(conf, fullFileStatus, fs, firstNonExistentParent); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, lastExistingParent), fs, firstNonExistentParent); } catch (Exception e) { LOG.warn("Error setting permissions of " + firstNonExistentParent, e); } @@ -566,9 +565,8 @@ public static boolean copy(FileSystem srcFS, Path src, boolean inheritPerms = conf.getBoolVar(HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); if (copied && inheritPerms) { - HdfsFileStatus fullFileStatus = shims.getFullFileStatus(conf, dstFS, dst); try { - shims.setFullFileStatus(conf, fullFileStatus, dstFS, dst); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, dstFS, dst.getParent()), dstFS, dst); } catch (Exception e) { LOG.warn("Error setting permissions or group of " + dst, e); } @@ -620,12 +618,11 @@ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf) thr */ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete) throws IOException { - LOG.info("deleting " + f); - HadoopShims hadoopShim = ShimLoader.getHadoopShims(); + LOG.debug("deleting " + f); boolean result = false; try { - result = hadoopShim.moveToAppropriateTrash(fs, f, conf); + result = Trash.moveToAppropriateTrash(fs, f, conf); if (result) { LOG.info("Moved to trash: " + f); return true; @@ -687,10 +684,8 @@ public static boolean renameWithPerms(FileSystem fs, Path sourcePath, } else { //rename the directory if (fs.rename(sourcePath, destPath)) { - HadoopShims shims = ShimLoader.getHadoopShims(); - HdfsFileStatus fullFileStatus = shims.getFullFileStatus(conf, fs, destPath.getParent()); try { - shims.setFullFileStatus(conf, fullFileStatus, fs, destPath); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, destPath.getParent()), fs, destPath); } catch (Exception e) { LOG.warn("Error setting permissions or group of " + destPath, e); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java index 9e7dcfc..df698c8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java @@ -25,10 +25,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.fs.Trash; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; public class HiveMetaStoreFsImpl implements MetaStoreFS { @@ -38,19 +36,18 @@ @Override public boolean deleteDir(FileSystem fs, Path f, boolean recursive, boolean ifPurge, Configuration conf) throws MetaException { - LOG.info("deleting " + f); - HadoopShims hadoopShim = ShimLoader.getHadoopShims(); + LOG.debug("deleting " + f); try { if (ifPurge) { LOG.info("Not moving "+ f +" to trash"); - } else if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) { + } else if (Trash.moveToAppropriateTrash(fs, f, conf)) { LOG.info("Moved to trash: " + f); return true; } if (fs.delete(f, true)) { - LOG.info("Deleted the diretory " + f); + LOG.debug("Deleted the diretory " + f); return true; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index cbeb361..9391cf7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.TableType; @@ -216,9 +217,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.tools.HadoopArchives; @@ -2388,7 +2386,7 @@ public int showColumns(Hive db, ShowColumnsDesc showCols) /** * Write a list of the user defined functions to a file. - * @param db + * @param db * * @param showFuncs * are the functions we're interested in. @@ -2441,7 +2439,7 @@ private int showFunctions(Hive db, ShowFunctionsDesc showFuncs) throws HiveExcep /** * Write a list of the current locks to a file. - * @param db + * @param db * * @param showLocks * the locks we're interested in. @@ -2719,7 +2717,7 @@ private int showTxns(Hive db, ShowTxnsDesc desc) throws HiveException { /** * Lock the table/partition specified - * @param db + * @param db * * @param lockTbl * the table/partition to be locked along with the mode @@ -2765,7 +2763,7 @@ private int unlockDatabase(Hive db, UnlockDatabaseDesc unlockDb) throws HiveExce /** * Unlock the table/partition specified - * @param db + * @param db * * @param unlockTbl * the table/partition to be unlocked @@ -2781,7 +2779,7 @@ private int unlockTable(Hive db, UnlockTableDesc unlockTbl) throws HiveException /** * Shows a description of a function. - * @param db + * @param db * * @param descFunc * is the function we are describing @@ -4183,15 +4181,13 @@ private int truncateTable(Hive db, TruncateTableDesc truncateTableDesc) throws H try { // this is not transactional - HadoopShims shim = ShimLoader.getHadoopShims(); for (Path location : getLocations(db, table, partSpec)) { FileSystem fs = location.getFileSystem(conf); - - HdfsFileStatus fullFileStatus = shim.getFullFileStatus(conf, fs, location); + HdfsUtils.HadoopFileStatus status = new HdfsUtils.HadoopFileStatus(conf, fs, location); fs.delete(location, true); fs.mkdirs(location); try { - shim.setFullFileStatus(conf, fullFileStatus, fs, location); + HdfsUtils.setFullFileStatus(conf, status, fs, location); } catch (Exception e) { LOG.warn("Error setting permissions of " + location, e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 54592cc..c2c6c65 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; @@ -65,8 +66,6 @@ import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -179,11 +178,9 @@ private Path createTargetPath(Path targetPath, FileSystem fs) throws IOException actualPath = actualPath.getParent(); } fs.mkdirs(mkDirPath); - HadoopShims shims = ShimLoader.getHadoopShims(); if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS)) { try { - HadoopShims.HdfsFileStatus status = shims.getFullFileStatus(conf, fs, actualPath); - shims.setFullFileStatus(conf, status, fs, actualPath); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, actualPath), fs, mkDirPath); } catch (Exception e) { LOG.warn("Error setting permissions or group of " + actualPath, e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 4d9c3d2..26c458c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.metastore.HiveMetaException; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; @@ -2611,9 +2612,9 @@ private static void copyFiles(final HiveConf conf, final FileSystem destFs, FileStatus[] srcs, final FileSystem srcFs, final Path destf, final boolean isSrcLocal, final List newFiles) throws HiveException { - final HadoopShims.HdfsFileStatus fullDestStatus; + final HdfsUtils.HadoopFileStatus fullDestStatus; try { - fullDestStatus = ShimLoader.getHadoopShims().getFullFileStatus(conf, destFs, destf); + fullDestStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf); } catch (IOException e1) { throw new HiveException(e1); } @@ -2674,7 +2675,7 @@ private static void copyFiles(final HiveConf conf, final FileSystem destFs, } if (inheritPerms) { - ShimLoader.getHadoopShims().setFullFileStatus(conf, fullDestStatus, destFs, destPath); + HdfsUtils.setFullFileStatus(conf, fullDestStatus, destFs, destPath); } if (null != newFiles) { newFiles.add(destPath); @@ -2697,17 +2698,6 @@ private static void copyFiles(final HiveConf conf, final FileSystem destFs, } } - private static boolean destExists(List> result, Path proposed) { - for (List sdpairs : result) { - for (Path[] sdpair : sdpairs) { - if (sdpair[1].equals(proposed)) { - return true; - } - } - } - return false; - } - private static boolean isSubDir(Path srcf, Path destf, FileSystem srcFs, FileSystem destFs, boolean isSrcLocal) { if (srcf == null) { LOG.debug("The source path is null for isSubDir method."); @@ -2795,8 +2785,7 @@ public static boolean moveFile(HiveConf conf, Path srcf, final Path destf, //needed for perm inheritance. boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); - HadoopShims shims = ShimLoader.getHadoopShims(); - HadoopShims.HdfsFileStatus destStatus = null; + HdfsUtils.HadoopFileStatus destStatus = null; // If source path is a subdirectory of the destination path: // ex: INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300; @@ -2808,7 +2797,7 @@ public static boolean moveFile(HiveConf conf, Path srcf, final Path destf, try { if (inheritPerms || replace) { try{ - destStatus = shims.getFullFileStatus(conf, destFs, destf); + destStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf); //if destf is an existing directory: //if replace is true, delete followed by rename(mv) is equivalent to replace //if replace is false, rename (mv) actually move the src under dest dir @@ -2821,7 +2810,7 @@ public static boolean moveFile(HiveConf conf, Path srcf, final Path destf, } catch (FileNotFoundException ignore) { //if dest dir does not exist, any re if (inheritPerms) { - destStatus = shims.getFullFileStatus(conf, destFs, destf.getParent()); + destStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf.getParent()); } } } @@ -2888,7 +2877,7 @@ public Boolean call() throws Exception { if (success && inheritPerms) { try { - ShimLoader.getHadoopShims().setFullFileStatus(conf, destStatus, destFs, destf); + HdfsUtils.setFullFileStatus(conf, destStatus, destFs, destf); } catch (IOException e) { LOG.warn("Error setting permission of file " + destf + ": "+ e.getMessage(), e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index d211eb2..37ef165 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -445,7 +445,7 @@ public HiveTxnManager getTxnMgr() { if ("hdfs".equals(fs.getUri().getScheme())) { hdfsEncryptionShim = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, sessionConf); } else { - LOG.info("Could not get hdfsEncryptionShim, it is only applicable to hdfs filesystem."); + LOG.debug("Could not get hdfsEncryptionShim, it is only applicable to hdfs filesystem."); } } catch (Exception e) { throw new HiveException(e); diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index c21088f..64b8780 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -47,7 +47,6 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; @@ -55,10 +54,6 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.TrashPolicy; -import org.apache.hadoop.fs.permission.AclEntry; -import org.apache.hadoop.fs.permission.AclEntryScope; -import org.apache.hadoop.fs.permission.AclEntryType; -import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; @@ -101,11 +96,6 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.test.MiniTezCluster; -import com.google.common.base.Joiner; -import com.google.common.base.Objects; -import com.google.common.base.Predicate; -import com.google.common.collect.Iterables; - /** * Implemention of shims against Hadoop 0.23.0. */ @@ -257,10 +247,6 @@ public String getJobLauncherHttpAddress(Configuration conf) { return conf.get("yarn.resourcemanager.webapp.address"); } - protected boolean isExtendedAclEnabled(Configuration conf) { - return Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true"); - } - @Override public long getDefaultBlockSize(FileSystem fs, Path path) { return fs.getDefaultBlockSize(path); @@ -272,12 +258,6 @@ public short getDefaultReplication(FileSystem fs, Path path) { } @Override - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException { - return Trash.moveToAppropriateTrash(fs, path, conf); - } - - @Override public void setTotalOrderPartitionFile(JobConf jobConf, Path partitionFile){ TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); } @@ -808,134 +788,6 @@ public void hflush(FSDataOutputStream stream) throws IOException { stream.hflush(); } - @Override - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, - Path file) throws IOException { - FileStatus fileStatus = fs.getFileStatus(file); - AclStatus aclStatus = null; - if (isExtendedAclEnabled(conf)) { - //Attempt extended Acl operations only if its enabled, but don't fail the operation regardless. - try { - aclStatus = fs.getAclStatus(file); - } catch (Exception e) { - LOG.info("Skipping ACL inheritance: File system for path " + file + " " + - "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); - LOG.debug("The details are: " + e, e); - } - } - return new Hadoop23FileStatus(fileStatus, aclStatus); - } - - @Override - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException { - String group = sourceStatus.getFileStatus().getGroup(); - //use FsShell to change group, permissions, and extended ACL's recursively - try { - FsShell fsShell = new FsShell(); - fsShell.setConf(conf); - //If there is no group of a file, no need to call chgrp - if (group != null && !group.isEmpty()) { - run(fsShell, new String[]{"-chgrp", "-R", group, target.toString()}); - } - - if (isExtendedAclEnabled(conf)) { - //Attempt extended Acl operations only if its enabled, 8791but don't fail the operation regardless. - try { - AclStatus aclStatus = ((Hadoop23FileStatus) sourceStatus).getAclStatus(); - if (aclStatus != null) { - List aclEntries = aclStatus.getEntries(); - removeBaseAclEntries(aclEntries); - - //the ACL api's also expect the tradition user/group/other permission in the form of ACL - FsPermission sourcePerm = sourceStatus.getFileStatus().getPermission(); - aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.USER, sourcePerm.getUserAction())); - aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.GROUP, sourcePerm.getGroupAction())); - aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.OTHER, sourcePerm.getOtherAction())); - - //construct the -setfacl command - String aclEntry = Joiner.on(",").join(aclStatus.getEntries()); - run(fsShell, new String[]{"-setfacl", "-R", "--set", aclEntry, target.toString()}); - } - } catch (Exception e) { - LOG.info("Skipping ACL inheritance: File system for path " + target + " " + - "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); - LOG.debug("The details are: " + e, e); - } - } else { - String permission = Integer.toString(sourceStatus.getFileStatus().getPermission().toShort(), 8); - run(fsShell, new String[]{"-chmod", "-R", permission, target.toString()}); - } - } catch (Exception e) { - throw new IOException("Unable to set permissions of " + target, e); - } - try { - if (LOG.isDebugEnabled()) { //some trace logging - getFullFileStatus(conf, fs, target).debugLog(); - } - } catch (Exception e) { - //ignore. - } - } - - public class Hadoop23FileStatus implements HdfsFileStatus { - private final FileStatus fileStatus; - private final AclStatus aclStatus; - public Hadoop23FileStatus(FileStatus fileStatus, AclStatus aclStatus) { - this.fileStatus = fileStatus; - this.aclStatus = aclStatus; - } - @Override - public FileStatus getFileStatus() { - return fileStatus; - } - public AclStatus getAclStatus() { - return aclStatus; - } - @Override - public void debugLog() { - if (fileStatus != null) { - LOG.debug(fileStatus.toString()); - } - if (aclStatus != null) { - LOG.debug(aclStatus.toString()); - } - } - } - - /** - * Create a new AclEntry with scope, type and permission (no name). - * - * @param scope - * AclEntryScope scope of the ACL entry - * @param type - * AclEntryType ACL entry type - * @param permission - * FsAction set of permissions in the ACL entry - * @return AclEntry new AclEntry - */ - private AclEntry newAclEntry(AclEntryScope scope, AclEntryType type, - FsAction permission) { - return new AclEntry.Builder().setScope(scope).setType(type) - .setPermission(permission).build(); - } - - /** - * Removes basic permission acls (unamed acls) from the list of acl entries - * @param entries acl entries to remove from. - */ - private void removeBaseAclEntries(List entries) { - Iterables.removeIf(entries, new Predicate() { - @Override - public boolean apply(AclEntry input) { - if (input.getName() == null) { - return true; - } - return false; - } - }); - } - class ProxyFileSystem23 extends ProxyFileSystem { public ProxyFileSystem23(FileSystem fs) { super(fs); diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java index c90b34c..e931156 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java @@ -19,43 +19,155 @@ package org.apache.hadoop.hive.io; import java.io.IOException; +import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -public class HdfsUtils { - private static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); - private static final Log LOG = LogFactory.getLog(HdfsUtils.class); +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; - public static long getFileId(FileSystem fileSystem, Path path) throws IOException { - String pathStr = path.toUri().getPath(); - if (fileSystem instanceof DistributedFileSystem) { - return SHIMS.getFileId(fileSystem, pathStr); - } - // If we are not on DFS, we just hash the file name + size and hope for the best. - // TODO: we assume it only happens in tests. Fix? - int nameHash = pathStr.hashCode(); - long fileSize = fileSystem.getFileStatus(path).getLen(); - long id = ((fileSize ^ (fileSize >>> 32)) << 32) | ((long)nameHash & 0xffffffffL); - LOG.warn("Cannot get unique file ID from " - + fileSystem.getClass().getSimpleName() + "; using " + id + "(" + pathStr - + "," + nameHash + "," + fileSize + ")"); - return id; - } +public class HdfsUtils { // TODO: this relies on HDFS not changing the format; we assume if we could get inode ID, this // is still going to work. Otherwise, file IDs can be turned off. Later, we should use // as public utility method in HDFS to obtain the inode-based path. private static String HDFS_ID_PATH_PREFIX = "/.reserved/.inodes/"; + static Logger LOG = LoggerFactory.getLogger("shims.HdfsUtils"); public static Path getFileIdPath( FileSystem fileSystem, Path path, long fileId) { return (fileSystem instanceof DistributedFileSystem) ? new Path(HDFS_ID_PATH_PREFIX + fileId) : path; } + + public static void setFullFileStatus(Configuration conf, HdfsUtils.HadoopFileStatus sourceStatus, + FileSystem fs, Path target) throws IOException { + FileStatus fStatus= sourceStatus.getFileStatus(); + String group = fStatus.getGroup(); + LOG.trace(sourceStatus.getFileStatus().toString()); + //use FsShell to change group, permissions, and extended ACL's recursively + FsShell fsShell = new FsShell(); + fsShell.setConf(conf); + + try { + //If there is no group of a file, no need to call chgrp + if (group != null && !group.isEmpty()) { + run(fsShell, new String[]{"-chgrp", "-R", group, target.toString()}); + } + + if (Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true")) { + //Attempt extended Acl operations only if its enabled, 8791but don't fail the operation regardless. + try { + AclStatus aclStatus = sourceStatus.getAclStatus(); + if (aclStatus != null) { + LOG.trace(aclStatus.toString()); + List aclEntries = aclStatus.getEntries(); + removeBaseAclEntries(aclEntries); + + //the ACL api's also expect the tradition user/group/other permission in the form of ACL + FsPermission sourcePerm = fStatus.getPermission(); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.USER, sourcePerm.getUserAction())); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.GROUP, sourcePerm.getGroupAction())); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.OTHER, sourcePerm.getOtherAction())); + + //construct the -setfacl command + String aclEntry = Joiner.on(",").join(aclStatus.getEntries()); + run(fsShell, new String[]{"-setfacl", "-R", "--set", aclEntry, target.toString()}); + } + } catch (Exception e) { + LOG.info("Skipping ACL inheritance: File system for path " + target + " " + + "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); + LOG.debug("The details are: " + e, e); + } + } else { + String permission = Integer.toString(fStatus.getPermission().toShort(), 8); + run(fsShell, new String[]{"-chmod", "-R", permission, target.toString()}); + } + } catch (Exception e) { + throw new IOException("Unable to set permissions of " + target, e); + } + } + + /** + * Create a new AclEntry with scope, type and permission (no name). + * + * @param scope + * AclEntryScope scope of the ACL entry + * @param type + * AclEntryType ACL entry type + * @param permission + * FsAction set of permissions in the ACL entry + * @return AclEntry new AclEntry + */ + private static AclEntry newAclEntry(AclEntryScope scope, AclEntryType type, + FsAction permission) { + return new AclEntry.Builder().setScope(scope).setType(type) + .setPermission(permission).build(); + } + /** + * Removes basic permission acls (unamed acls) from the list of acl entries + * @param entries acl entries to remove from. + */ + private static void removeBaseAclEntries(List entries) { + Iterables.removeIf(entries, new Predicate() { + @Override + public boolean apply(AclEntry input) { + if (input.getName() == null) { + return true; + } + return false; + } + }); + } + + private static void run(FsShell shell, String[] command) throws Exception { + LOG.debug(ArrayUtils.toString(command)); + int retval = shell.run(command); + LOG.debug("Return value is :" + retval); + } +public static class HadoopFileStatus { + + private final FileStatus fileStatus; + private final AclStatus aclStatus; + + public HadoopFileStatus(Configuration conf, FileSystem fs, Path file) throws IOException { + + FileStatus fileStatus = fs.getFileStatus(file); + AclStatus aclStatus = null; + if (Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true")) { + //Attempt extended Acl operations only if its enabled, but don't fail the operation regardless. + try { + aclStatus = fs.getAclStatus(file); + } catch (Exception e) { + LOG.info("Skipping ACL inheritance: File system for path " + file + " " + + "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); + LOG.debug("The details are: " + e, e); + } + }this.fileStatus = fileStatus; + this.aclStatus = aclStatus; + } + + public FileStatus getFileStatus() { + return fileStatus; + } + public AclStatus getAclStatus() { + return aclStatus; + } +} } diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 4b9119b..37eb8f6 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -179,18 +179,6 @@ MiniDFSShim getMiniDfs(Configuration conf, public String getJobLauncherHttpAddress(Configuration conf); /** - * Move the directory/file to trash. In case of the symlinks or mount points, the file is - * moved to the trashbin in the actual volume of the path p being deleted - * @param fs - * @param path - * @param conf - * @return false if the item is already in the trash or trash is disabled - * @throws IOException - */ - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException; - - /** * Get the default block size for the path. FileSystem alone is not sufficient to * determine the same, as in case of CSMT the underlying file system determines that. * @param fs @@ -292,35 +280,6 @@ RecordReader getRecordReader(JobConf job, CombineFileSplit split, Reporter repor */ public void hflush(FSDataOutputStream stream) throws IOException; - /** - * For a given file, return a file status - * @param conf - * @param fs - * @param file - * @return - * @throws IOException - */ - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, Path file) throws IOException; - - /** - * For a given file, set a given file status. - * @param conf - * @param sourceStatus - * @param fs - * @param target - * @throws IOException - */ - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException; - - /** - * Includes the vanilla FileStatus, and AclStatus if it applies to this version of hadoop. - */ - public interface HdfsFileStatus { - public FileStatus getFileStatus(); - public void debugLog(); - } - public interface HdfsFileStatusWithId { public FileStatus getFileStatus(); public Long getFileId(); diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 63d48a5..87682e6 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -373,21 +373,11 @@ public RecordReader getRecordReader(JobConf job, CombineFileSplit split, abstract public long getDefaultBlockSize(FileSystem fs, Path path); @Override - abstract public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException; - - @Override abstract public FileSystem createProxyFileSystem(FileSystem fs, URI uri); @Override abstract public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException; - protected void run(FsShell shell, String[] command) throws Exception { - LOG.debug(ArrayUtils.toString(command)); - int retval = shell.run(command); - LOG.debug("Return value is :" + retval); - } - private static String[] dedup(String[] locations) throws IOException { Set dedup = new HashSet(); Collections.addAll(dedup, locations); -- 1.7.12.4 (Apple Git-37)