diff --git a/common/pom.xml b/common/pom.xml index b7244aa..a8fdd27 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -61,6 +61,11 @@ ${commons-lang.version} + org.apache.commons + commons-lang3 + ${commons-lang3.version} + + org.eclipse.jetty.aggregate jetty-all ${jetty.version} diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index bba14e2..c2ff635 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -43,11 +43,13 @@ import com.google.common.collect.Interner; import com.google.common.collect.Interners; +import org.apache.commons.lang3.text.translate.CharSequenceTranslator; +import org.apache.commons.lang3.text.translate.EntityArrays; +import org.apache.commons.lang3.text.translate.LookupTranslator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.StringUtils; /** * HiveStringUtils @@ -66,6 +68,14 @@ private static final DecimalFormat decimalFormat; + private static final CharSequenceTranslator ESCAPE_JAVA = + new LookupTranslator( + new String[][] { + {"\"", "\\\""}, + {"\\", "\\\\"}, + }).with( + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())); + /** * Maintain a String pool to reduce memory. */ @@ -603,6 +613,17 @@ public static String escapeString(String str, char escapeChar, } /** + * Escape non-unicode characters. StringEscapeUtil.escapeJava() will escape + * unicode characters as well but in some cases it's not desired. + * + * @param str Original string + * @return Escaped string + */ + public static String escapeJava(String str) { + return ESCAPE_JAVA.translate(str); +} + + /** * Unescape commas in the string using the default escape char * @param str a string * @return an unescaped string diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 493e3a0..7099b2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -31,6 +31,7 @@ import java.io.Writer; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.AbstractList; import java.util.ArrayList; @@ -2076,7 +2077,7 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam if (tbl.isView()) { String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText(); - outStream.writeBytes(createTab_stmt.toString()); + outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8)); return 0; } @@ -2225,7 +2226,7 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { } createTab_stmt.add(TBL_PROPERTIES, tbl_properties); - outStream.writeBytes(createTab_stmt.render()); + outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { LOG.info("show create table: " + stringifyException(e)); return 1; @@ -2288,14 +2289,14 @@ private int showIndexes(Hive db, ShowIndexesDesc showIndexes) throws HiveExcepti try { if (showIndexes.isFormatted()) { // column headers - outStream.writeBytes(MetaDataFormatUtils.getIndexColumnsHeader()); + outStream.write(MetaDataFormatUtils.getIndexColumnsHeader().getBytes(StandardCharsets.UTF_8)); outStream.write(terminator); outStream.write(terminator); } for (Index index : indexes) { - outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(index)); + outStream.write(MetaDataFormatUtils.getAllColumnsInformation(index).getBytes(StandardCharsets.UTF_8)); } } catch (FileNotFoundException e) { LOG.info("show indexes: " + stringifyException(e)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index a2ccd56..03803bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.HiveStringUtils; import java.math.BigInteger; import java.util.ArrayList; @@ -438,7 +440,7 @@ private static void getTableMetaDataInformation(StringBuilder tableInfo, Table if (tbl.getParameters().size() > 0) { tableInfo.append("Table Parameters:").append(LINE_DELIM); - displayAllParameters(tbl.getParameters(), tableInfo); + displayAllParameters(tbl.getParameters(), tableInfo, false); } } @@ -457,12 +459,28 @@ private static void getPartitionMetaDataInformation(StringBuilder tableInfo, Par } } + /** + * Display key, value pairs of the parameters. The characters will be escaped + * including unicode. + */ private static void displayAllParameters(Map params, StringBuilder tableInfo) { + displayAllParameters(params, tableInfo, true); + } + + /** + * Display key, value pairs of the parameters. The characters will be escaped + * including unicode if escapeUnicode is true; otherwise the characters other + * than unicode will be escaped. + */ + + private static void displayAllParameters(Map params, StringBuilder tableInfo, boolean escapeUnicode) { List keys = new ArrayList(params.keySet()); Collections.sort(keys); for (String key : keys) { tableInfo.append(FIELD_DELIM); // Ensures all params are indented. - formatOutput(key, StringEscapeUtils.escapeJava(params.get(key)), tableInfo); + formatOutput(key, + escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) : HiveStringUtils.escapeJava(params.get(key)), + tableInfo); } } diff --git a/ql/src/test/queries/clientpositive/unicode_comments.q b/ql/src/test/queries/clientpositive/unicode_comments.q new file mode 100644 index 0000000..4d958e4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/unicode_comments.q @@ -0,0 +1,17 @@ +create database unicode_comments_db comment '数据库'; +use unicode_comments_db; +create table unicode_comments_tbl1 +(col1 string comment '第一列') comment '表格' +partitioned by (p1 string comment '分割'); +create view unicode_comments_view1 (col1 comment '第一列') comment '视图' +as select col1 from unicode_comments_tbl1; +create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引'; + +describe database extended unicode_comments_db; +show create table unicode_comments_tbl1; +describe formatted unicode_comments_tbl1; +show create table unicode_comments_view1; +describe formatted unicode_comments_view1; +show formatted index on unicode_comments_tbl1; + +drop database unicode_comments_db cascade; diff --git a/ql/src/test/results/clientpositive/unicode_comments.q.out b/ql/src/test/results/clientpositive/unicode_comments.q.out new file mode 100644 index 0000000..4872cd3 --- /dev/null +++ b/ql/src/test/results/clientpositive/unicode_comments.q.out @@ -0,0 +1,166 @@ +PREHOOK: query: create database unicode_comments_db comment '数据库' +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:unicode_comments_db +POSTHOOK: query: create database unicode_comments_db comment '数据库' +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:unicode_comments_db +PREHOOK: query: use unicode_comments_db +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:unicode_comments_db +POSTHOOK: query: use unicode_comments_db +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:unicode_comments_db +PREHOOK: query: create table unicode_comments_tbl1 +(col1 string comment '第一列') comment '表格' +partitioned by (p1 string comment '分割') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: create table unicode_comments_tbl1 +(col1 string comment '第一列') comment '表格' +partitioned by (p1 string comment '分割') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') comment '视图' +as select col1 from unicode_comments_tbl1 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') comment '视图' +as select col1 from unicode_comments_tbl1 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_view1 +PREHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引' +PREHOOK: type: CREATEINDEX +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引' +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +PREHOOK: query: describe database extended unicode_comments_db +PREHOOK: type: DESCDATABASE +PREHOOK: Input: database:unicode_comments_db +POSTHOOK: query: describe database extended unicode_comments_db +POSTHOOK: type: DESCDATABASE +POSTHOOK: Input: database:unicode_comments_db +unicode_comments_db 数据库 location/in/test hive_test_user USER +PREHOOK: query: show create table unicode_comments_tbl1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: show create table unicode_comments_tbl1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +CREATE TABLE `unicode_comments_tbl1`( + `col1` string COMMENT '第一列') +COMMENT '表格' +PARTITIONED BY ( + `p1` string COMMENT '分割') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION +#### A masked pattern was here #### +TBLPROPERTIES ( +#### A masked pattern was here #### +PREHOOK: query: describe formatted unicode_comments_tbl1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: describe formatted unicode_comments_tbl1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +# col_name data_type comment + +col1 string 第一列 + +# Partition Information +# col_name data_type comment + +p1 string 分割 + +# Detailed Table Information +Database: unicode_comments_db +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + comment 表格 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show create table unicode_comments_view1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: show create table unicode_comments_view1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_view1 +CREATE VIEW `unicode_comments_view1` AS SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1` +PREHOOK: query: describe formatted unicode_comments_view1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: describe formatted unicode_comments_view1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_view1 +# col_name data_type comment + +col1 string 第一列 + +# Detailed Table Information +Database: unicode_comments_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + comment 视图 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: null +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] + +# View Information +View Original Text: select col1 from unicode_comments_tbl1 +View Expanded Text: SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1` +PREHOOK: query: show formatted index on unicode_comments_tbl1 +PREHOOK: type: SHOWINDEXES +POSTHOOK: query: show formatted index on unicode_comments_tbl1 +POSTHOOK: type: SHOWINDEXES +idx_name tab_name col_names idx_tab_name idx_type comment + + +index2 unicode_comments_tbl1 col1 unicode_comments_db__unicode_comments_tbl1_index2__ compact 索引 +PREHOOK: query: drop database unicode_comments_db cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:unicode_comments_db +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: Output: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: drop database unicode_comments_db cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:unicode_comments_db +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: unicode_comments_db@unicode_comments_view1