diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 37ff2c04dc..17169d6e18 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -258,6 +258,11 @@ public static String makeListBucketingDirName(List lbCols, List } } + /** + * Hex encoding characters indexed by integer value + */ + private static final char[] HEX_UPPER_CHARS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + static boolean needsEscaping(char c) { return c < charToEscape.size() && charToEscape.get(c); } @@ -287,12 +292,28 @@ public static String escapePathName(String path, String defaultPath) { } } - StringBuilder sb = new StringBuilder(); + // Fast-path detection, no escaping and therefore no copying necessary + int firstEscapeIndex = -1; for (int i = 0; i < path.length(); i++) { + if (needsEscaping(path.charAt(i))) { + firstEscapeIndex = i; + break; + } + } + if (firstEscapeIndex == -1) { + return path; + } + + // slow path, escape beyond the first required escape character into a new string + StringBuilder sb = new StringBuilder(); + if (firstEscapeIndex > 0) { + sb.append(path, 0, firstEscapeIndex); + } + + for (int i = firstEscapeIndex; i < path.length(); i++) { char c = path.charAt(i); if (needsEscaping(c)) { - sb.append('%'); - sb.append(String.format("%1$02X", (int) c)); + sb.append('%').append(HEX_UPPER_CHARS[(0xF0 & c) >>> 4]).append(HEX_UPPER_CHARS[(0x0F & c)]); } else { sb.append(c); } @@ -301,8 +322,17 @@ public static String escapePathName(String path, String defaultPath) { } public static String unescapePathName(String path) { + int firstUnescapeIndex = path.indexOf('%'); + if (firstUnescapeIndex == -1) { + return path; + } + StringBuilder sb = new StringBuilder(); - for (int i = 0; i < path.length(); i++) { + if (firstUnescapeIndex > 0) { + sb.append(path, 0, firstUnescapeIndex); + } + + for (int i = firstUnescapeIndex; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' && i + 2 < path.length()) { int code = -1; diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java index 2721deb7a0..9ffb52ba5f 100644 --- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java +++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java @@ -303,6 +303,14 @@ public void testListStatusIterator() throws Exception { assertEquals(1, assertExpectedFilePaths(itr, Collections.singletonList("mock:/tmp/dummy"))); } + @Test + public void testPathEscapeChars() { + StringBuilder sb = new StringBuilder(); + FileUtils.charToEscape.stream().forEach(integer -> sb.append((char) integer)); + String path = sb.toString(); + assertEquals(path, FileUtils.unescapePathName(FileUtils.escapePathName(path))); + } + private int assertExpectedFilePaths(RemoteIterator lfs, List expectedPaths) throws Exception { int count = 0;