diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index f6ba521..19342a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -438,8 +438,6 @@ public static void readProps( } } - private static final int[] multiplier = new int[] {1000, 100, 10, 1}; - @SuppressWarnings("nls") public static String unescapeSQLString(String b) { Character enclosure = null; @@ -469,7 +467,7 @@ public static String unescapeSQLString(String b) { int base = i + 2; for (int j = 0; j < 4; j++) { int digit = Character.digit(b.charAt(j + base), 16); - code += digit * multiplier[j]; + code = (code << 4) + digit; } sb.append((char)code); i += 5; diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java index be1f7ff..d35fa91 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java @@ -66,4 +66,41 @@ public void checkNormalization(String colType, String originalColSpec, assertEquals(result, partSpec.get(colName)); } } + + @Test + public void testUnescapeSQLString() { + assertEquals("abcdefg", BaseSemanticAnalyzer.unescapeSQLString("\"abcdefg\"")); + + // String enclosed by single quotes. + assertEquals("C0FFEE", BaseSemanticAnalyzer.unescapeSQLString("\'C0FFEE\'")); + + // Strings including single escaped characters. + assertEquals("\u0000", BaseSemanticAnalyzer.unescapeSQLString("'\\0'")); + assertEquals("\'", BaseSemanticAnalyzer.unescapeSQLString("\"\\'\"")); + assertEquals("\"", BaseSemanticAnalyzer.unescapeSQLString("'\\\"'")); + assertEquals("\b", BaseSemanticAnalyzer.unescapeSQLString("\"\\b\"")); + assertEquals("\n", BaseSemanticAnalyzer.unescapeSQLString("'\\n'")); + assertEquals("\r", BaseSemanticAnalyzer.unescapeSQLString("\"\\r\"")); + assertEquals("\t", BaseSemanticAnalyzer.unescapeSQLString("'\\t'")); + assertEquals("\u001A", BaseSemanticAnalyzer.unescapeSQLString("\"\\Z\"")); + assertEquals("\\", BaseSemanticAnalyzer.unescapeSQLString("'\\\\'")); + assertEquals("\\%", BaseSemanticAnalyzer.unescapeSQLString("\"\\%\"")); + assertEquals("\\_", BaseSemanticAnalyzer.unescapeSQLString("'\\_'")); + + // String including '\000' style literal characters. + assertEquals("3 + 5 = \u0038", BaseSemanticAnalyzer.unescapeSQLString("'3 + 5 = \\070'")); + assertEquals("\u0000", BaseSemanticAnalyzer.unescapeSQLString("\"\\000\"")); + + // String including invalid '\000' style literal characters. + assertEquals("256", BaseSemanticAnalyzer.unescapeSQLString("\"\\256\"")); + + // String including a '\u0000' style literal characters (\u732B is a cat in Kanji). + assertEquals("How cute \u732B are", + BaseSemanticAnalyzer.unescapeSQLString("\"How cute \\u732B are\"")); + + // String including a surrogate pair character + // (\uD867\uDE3D is Okhotsk atka mackerel in Kanji). + assertEquals("\uD867\uDE3D is a fish", + BaseSemanticAnalyzer.unescapeSQLString("\"\\uD867\uDE3D is a fish\"")); + } }