diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index bba14e2..912e38a 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -40,6 +40,7 @@ import java.util.StringTokenizer; import java.util.regex.Pattern; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Interner; import com.google.common.collect.Interners; @@ -47,7 +48,6 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.StringUtils; /** * HiveStringUtils @@ -66,6 +66,16 @@ private static final DecimalFormat decimalFormat; + private static final Map ESCAPE_MAP = + new ImmutableMap.Builder() + .put('\b', 'b') + .put('\n', 'n') + .put('\t', 't') + .put('\f', 'f') + .put('\r', 'r') + .put('"', '"') + .put('\\', '\\').build(); + /** * Maintain a String pool to reduce memory. */ @@ -603,6 +613,32 @@ public static String escapeString(String str, char escapeChar, } /** + * Escape non-unicode characters. StringEscapeUtil.escapeJava() will escape + * unicode characters as well but in some cases it's not desired. + * + * @param str Original string + * @return Escaped string + */ + public static String escapeJava(String str) { + if (str == null) { + return null; + } + + StringBuilder result = new StringBuilder(); + int sz = str.length(); + for (int i = 0; i < sz; i++) { + char curChar = str.charAt(i); + Character mapped = ESCAPE_MAP.get(curChar); + if (mapped != null) { + result.append('\\'); + curChar = mapped; + } + result.append(curChar); + } + return result.toString(); +} + + /** * Unescape commas in the string using the default escape char * @param str a string * @return an unescaped string diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 493e3a0..7099b2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -31,6 +31,7 @@ import java.io.Writer; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.AbstractList; import java.util.ArrayList; @@ -2076,7 +2077,7 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam if (tbl.isView()) { String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText(); - outStream.writeBytes(createTab_stmt.toString()); + outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8)); return 0; } @@ -2225,7 +2226,7 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { } createTab_stmt.add(TBL_PROPERTIES, tbl_properties); - outStream.writeBytes(createTab_stmt.render()); + outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { LOG.info("show create table: " + stringifyException(e)); return 1; @@ -2288,14 +2289,14 @@ private int showIndexes(Hive db, ShowIndexesDesc showIndexes) throws HiveExcepti try { if (showIndexes.isFormatted()) { // column headers - outStream.writeBytes(MetaDataFormatUtils.getIndexColumnsHeader()); + outStream.write(MetaDataFormatUtils.getIndexColumnsHeader().getBytes(StandardCharsets.UTF_8)); outStream.write(terminator); outStream.write(terminator); } for (Index index : indexes) { - outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(index)); + outStream.write(MetaDataFormatUtils.getAllColumnsInformation(index).getBytes(StandardCharsets.UTF_8)); } } catch (FileNotFoundException e) { LOG.info("show indexes: " + stringifyException(e)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index a2ccd56..21baa0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.HiveStringUtils; import java.math.BigInteger; import java.util.ArrayList; @@ -462,7 +464,7 @@ private static void displayAllParameters(Map params, StringBuild Collections.sort(keys); for (String key : keys) { tableInfo.append(FIELD_DELIM); // Ensures all params are indented. - formatOutput(key, StringEscapeUtils.escapeJava(params.get(key)), tableInfo); + formatOutput(key, HiveStringUtils.escapeJava(params.get(key)), tableInfo); } } diff --git a/ql/src/test/queries/clientpositive/unicode_comments.q b/ql/src/test/queries/clientpositive/unicode_comments.q new file mode 100644 index 0000000..4d958e4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/unicode_comments.q @@ -0,0 +1,17 @@ +create database unicode_comments_db comment '数据库'; +use unicode_comments_db; +create table unicode_comments_tbl1 +(col1 string comment '第一列') comment '表格' +partitioned by (p1 string comment '分割'); +create view unicode_comments_view1 (col1 comment '第一列') comment '视图' +as select col1 from unicode_comments_tbl1; +create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引'; + +describe database extended unicode_comments_db; +show create table unicode_comments_tbl1; +describe formatted unicode_comments_tbl1; +show create table unicode_comments_view1; +describe formatted unicode_comments_view1; +show formatted index on unicode_comments_tbl1; + +drop database unicode_comments_db cascade; diff --git a/ql/src/test/results/clientpositive/unicode_comments.q.out b/ql/src/test/results/clientpositive/unicode_comments.q.out new file mode 100644 index 0000000..4872cd3 --- /dev/null +++ b/ql/src/test/results/clientpositive/unicode_comments.q.out @@ -0,0 +1,166 @@ +PREHOOK: query: create database unicode_comments_db comment '数据库' +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:unicode_comments_db +POSTHOOK: query: create database unicode_comments_db comment '数据库' +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:unicode_comments_db +PREHOOK: query: use unicode_comments_db +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:unicode_comments_db +POSTHOOK: query: use unicode_comments_db +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:unicode_comments_db +PREHOOK: query: create table unicode_comments_tbl1 +(col1 string comment '第一列') comment '表格' +partitioned by (p1 string comment '分割') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: create table unicode_comments_tbl1 +(col1 string comment '第一列') comment '表格' +partitioned by (p1 string comment '分割') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') comment '视图' +as select col1 from unicode_comments_tbl1 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') comment '视图' +as select col1 from unicode_comments_tbl1 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_view1 +PREHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引' +PREHOOK: type: CREATEINDEX +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引' +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +PREHOOK: query: describe database extended unicode_comments_db +PREHOOK: type: DESCDATABASE +PREHOOK: Input: database:unicode_comments_db +POSTHOOK: query: describe database extended unicode_comments_db +POSTHOOK: type: DESCDATABASE +POSTHOOK: Input: database:unicode_comments_db +unicode_comments_db 数据库 location/in/test hive_test_user USER +PREHOOK: query: show create table unicode_comments_tbl1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: show create table unicode_comments_tbl1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +CREATE TABLE `unicode_comments_tbl1`( + `col1` string COMMENT '第一列') +COMMENT '表格' +PARTITIONED BY ( + `p1` string COMMENT '分割') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION +#### A masked pattern was here #### +TBLPROPERTIES ( +#### A masked pattern was here #### +PREHOOK: query: describe formatted unicode_comments_tbl1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: describe formatted unicode_comments_tbl1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +# col_name data_type comment + +col1 string 第一列 + +# Partition Information +# col_name data_type comment + +p1 string 分割 + +# Detailed Table Information +Database: unicode_comments_db +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + comment 表格 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show create table unicode_comments_view1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: show create table unicode_comments_view1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_view1 +CREATE VIEW `unicode_comments_view1` AS SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1` +PREHOOK: query: describe formatted unicode_comments_view1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: describe formatted unicode_comments_view1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_view1 +# col_name data_type comment + +col1 string 第一列 + +# Detailed Table Information +Database: unicode_comments_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + comment 视图 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: null +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] + +# View Information +View Original Text: select col1 from unicode_comments_tbl1 +View Expanded Text: SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1` +PREHOOK: query: show formatted index on unicode_comments_tbl1 +PREHOOK: type: SHOWINDEXES +POSTHOOK: query: show formatted index on unicode_comments_tbl1 +POSTHOOK: type: SHOWINDEXES +idx_name tab_name col_names idx_tab_name idx_type comment + + +index2 unicode_comments_tbl1 col1 unicode_comments_db__unicode_comments_tbl1_index2__ compact 索引 +PREHOOK: query: drop database unicode_comments_db cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:unicode_comments_db +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: Output: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: drop database unicode_comments_db cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:unicode_comments_db +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: unicode_comments_db@unicode_comments_view1