From 8bb8166e4bc29f36f4e15eb9e2ad22a8140baa96 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 24 May 2016 16:33:34 -0700 Subject: [PATCH] HIVE-13839 : Refactor : remove SHIMS.getListLocatedStatus --- .../hcatalog/templeton/tool/TempletonUtils.java | 17 ++++++++-------- .../hadoop/hive/ql/hooks/PostExecOrcFileDump.java | 7 ++++--- .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 4 ++-- .../org/apache/hadoop/hive/ql/io/HdfsUtils.java | 23 +++++++++++++++++++--- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 ++-- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 17 ---------------- .../org/apache/hadoop/hive/shims/HadoopShims.java | 12 ----------- 9 files changed, 52 insertions(+), 48 deletions(-) diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java index 83584d3..201e647 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java @@ -104,14 +104,14 @@ public static boolean isset(char ch) { public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$"); /** * Hive on Tez produces progress report that looks like this - * Map 1: -/- Reducer 2: 0/1 - * Map 1: -/- Reducer 2: 0(+1)/1 + * Map 1: -/- Reducer 2: 0/1 + * Map 1: -/- Reducer 2: 0(+1)/1 * Map 1: -/- Reducer 2: 1/1 - * + * * -/- means there are no tasks (yet) * 0/1 means 1 total tasks, 0 completed * 1(+2)/3 means 3 total, 1 completed and 2 running - * + * * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png * has more examples. * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total" @@ -132,7 +132,7 @@ public static String extractPercentComplete(String line) { Matcher pig = PIG_COMPLETE.matcher(line); if (pig.find()) return pig.group().trim(); - + Matcher hive = HIVE_COMPLETE.matcher(line); if(hive.find()) { return "map " + hive.group(1) + " reduce " + hive.group(2); @@ -274,7 +274,7 @@ public static String hadoopFsFilename(String fname, Configuration conf, String u if(!fs.exists(p)) { return Collections.emptyList(); } - List children = ShimLoader.getHadoopShims().listLocatedStatus(fs, p, null); + FileStatus[] children = fs.listStatus(p); if(!isset(children)) { return Collections.emptyList(); } @@ -327,9 +327,10 @@ public static Path hadoopFsPath(String fname, final Configuration conf, String u } final String finalFName = new String(fname); - final FileSystem defaultFs = + final FileSystem defaultFs = ugi.doAs(new PrivilegedExceptionAction() { - public FileSystem run() + @Override + public FileSystem run() throws URISyntaxException, IOException, InterruptedException { return FileSystem.get(new URI(finalFName), conf); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java index e184fcb..9e1f14f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java @@ -30,12 +30,12 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.orc.FileFormatException; +import org.apache.hadoop.hive.ql.io.HdfsUtils; import org.apache.orc.tools.FileDump; +import org.apache.orc.FileFormatException; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.ShimLoader; import com.google.common.collect.Lists; @@ -48,6 +48,7 @@ private static final Logger LOG = LoggerFactory.getLogger(PostExecOrcFileDump.class.getName()); private static final PathFilter hiddenFileFilter = new PathFilter() { + @Override public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); @@ -88,7 +89,7 @@ public void run(HookContext hookContext) throws Exception { for (Path dir : directories) { FileSystem fs = dir.getFileSystem(conf); - List fileList = ShimLoader.getHadoopShims().listLocatedStatus(fs, dir, + List fileList = HdfsUtils.listLocatedStatus(fs, dir, hiddenFileFilter); for (FileStatus fileStatus : fileList) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index bac38ce..496bd0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -507,7 +507,7 @@ public static Directory getAcidState(Path directory, originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); } } else { - List children = SHIMS.listLocatedStatus(fs, directory, hiddenFileFilter); + List children = HdfsUtils.listLocatedStatus(fs, directory, hiddenFileFilter); for (FileStatus child : children) { getChildState( child, null, txnList, working, originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); @@ -675,7 +675,7 @@ private static void findOriginals(FileSystem fs, FileStatus stat, } } } else { - List children = SHIMS.listLocatedStatus(fs, stat.getPath(), hiddenFileFilter); + List children = HdfsUtils.listLocatedStatus(fs, stat.getPath(), hiddenFileFilter); for (FileStatus child : children) { if (child.isDir()) { findOriginals(fs, child, original, useFileIds); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java index b71ca09..9b8b761 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java @@ -19,12 +19,17 @@ package org.apache.hadoop.hive.ql.io; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; @@ -53,7 +58,7 @@ public static long createFileId(String pathStr, FileStatus fs, boolean doLog, St int fileSizeHash = (int)(fileSize ^ (fileSize >>> 32)), modTimeHash = (int)(modTime ^ (modTime >>> 32)), combinedHash = modTimeHash ^ fileSizeHash; - long id = (((long)nameHash & 0xffffffffL) << 32) | ((long)combinedHash & 0xffffffffL); + long id = ((nameHash & 0xffffffffL) << 32) | (combinedHash & 0xffffffffL); if (doLog) { LOG.warn("Cannot get unique file ID from " + fsName + "; using " + id + " (" + pathStr + "," + nameHash + "," + fileSize + ")"); @@ -61,8 +66,20 @@ public static long createFileId(String pathStr, FileStatus fs, boolean doLog, St return id; } - - + public static List listLocatedStatus(final FileSystem fs, + final Path path, + final PathFilter filter + ) throws IOException { + RemoteIterator itr = fs.listLocatedStatus(path); + List result = new ArrayList(); + while(itr.hasNext()) { + FileStatus stat = itr.next(); + if (filter == null || filter.accept(stat.getPath())) { + result.add(stat); + } + } + return result; + } // TODO: this relies on HDFS not changing the format; we assume if we could get inode ID, this // is still going to work. Otherwise, file IDs can be turned off. Later, we should use diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 33fe3b6..cb73ef0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -20,7 +20,6 @@ import org.apache.orc.impl.InStream; - import java.io.IOException; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; @@ -80,6 +79,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.AcidUtils.Directory; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HdfsUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.LlapWrappableInputFormatInterface; @@ -1033,7 +1033,7 @@ private AcidDirInfo callInternal() throws IOException { } // Fall back to regular API and create states without ID. - List children = SHIMS.listLocatedStatus(fs, base, AcidUtils.hiddenFileFilter); + List children = HdfsUtils.listLocatedStatus(fs, base, AcidUtils.hiddenFileFilter); List result = new ArrayList<>(children.size()); for (FileStatus child : children) { result.add(AcidUtils.createOriginalObj(null, child)); diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index ef2b7f7..273099e 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -52,7 +52,6 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.ProxyFileSystem; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.TrashPolicy; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; @@ -664,22 +663,6 @@ public WebHCatJTShim getWebHCatShim(Configuration conf, UserGroupInformation ugi return new WebHCatJTShim23(conf, ugi);//this has state, so can't be cached } - @Override - public List listLocatedStatus(final FileSystem fs, - final Path path, - final PathFilter filter - ) throws IOException { - RemoteIterator itr = fs.listLocatedStatus(path); - List result = new ArrayList(); - while(itr.hasNext()) { - FileStatus stat = itr.next(); - if (filter == null || filter.accept(stat.getPath())) { - result.add(stat); - } - } - return result; - } - private static final class HdfsFileStatusWithIdImpl implements HdfsFileStatusWithId { private final LocatedFileStatus lfs; private final long fileId; diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 4a96355..3e30758 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -234,18 +234,6 @@ RecordReader getRecordReader(JobConf job, CombineFileSplit split, Reporter repor Class> rrClass) throws IOException; } - /** - * Get the block locations for the given directory. - * @param fs the file system - * @param path the directory name to get the status and block locations - * @param filter a filter that needs to accept the file (or null) - * @return an list for the located file status objects - * @throws IOException - */ - List listLocatedStatus(FileSystem fs, Path path, - PathFilter filter) throws IOException; - - List listLocatedHdfsStatus( FileSystem fs, Path path, PathFilter filter) throws IOException; -- 1.7.12.4 (Apple Git-37)