diff --git a/common/pom.xml b/common/pom.xml
index b7244aa..a8fdd27 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -61,6 +61,11 @@
${commons-lang.version}
+ org.apache.commons
+ commons-lang3
+ ${commons-lang3.version}
+
+
org.eclipse.jetty.aggregate
jetty-all
${jetty.version}
diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
index bba14e2..c2ff635 100644
--- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
+++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
@@ -43,11 +43,13 @@
import com.google.common.collect.Interner;
import com.google.common.collect.Interners;
+import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
+import org.apache.commons.lang3.text.translate.EntityArrays;
+import org.apache.commons.lang3.text.translate.LookupTranslator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.util.StringUtils;
/**
* HiveStringUtils
@@ -66,6 +68,14 @@
private static final DecimalFormat decimalFormat;
+ private static final CharSequenceTranslator ESCAPE_JAVA =
+ new LookupTranslator(
+ new String[][] {
+ {"\"", "\\\""},
+ {"\\", "\\\\"},
+ }).with(
+ new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()));
+
/**
* Maintain a String pool to reduce memory.
*/
@@ -603,6 +613,17 @@ public static String escapeString(String str, char escapeChar,
}
/**
+ * Escape non-unicode characters. StringEscapeUtil.escapeJava() will escape
+ * unicode characters as well but in some cases it's not desired.
+ *
+ * @param str Original string
+ * @return Escaped string
+ */
+ public static String escapeJava(String str) {
+ return ESCAPE_JAVA.translate(str);
+}
+
+ /**
* Unescape commas in the string using the default escape char
* @param str a string
* @return an unescaped string
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 493e3a0..7099b2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -31,6 +31,7 @@
import java.io.Writer;
import java.net.URI;
import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.util.AbstractList;
import java.util.ArrayList;
@@ -2076,7 +2077,7 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam
if (tbl.isView()) {
String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText();
- outStream.writeBytes(createTab_stmt.toString());
+ outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8));
return 0;
}
@@ -2225,7 +2226,7 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) {
}
createTab_stmt.add(TBL_PROPERTIES, tbl_properties);
- outStream.writeBytes(createTab_stmt.render());
+ outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8));
} catch (IOException e) {
LOG.info("show create table: " + stringifyException(e));
return 1;
@@ -2288,14 +2289,14 @@ private int showIndexes(Hive db, ShowIndexesDesc showIndexes) throws HiveExcepti
try {
if (showIndexes.isFormatted()) {
// column headers
- outStream.writeBytes(MetaDataFormatUtils.getIndexColumnsHeader());
+ outStream.write(MetaDataFormatUtils.getIndexColumnsHeader().getBytes(StandardCharsets.UTF_8));
outStream.write(terminator);
outStream.write(terminator);
}
for (Index index : indexes)
{
- outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(index));
+ outStream.write(MetaDataFormatUtils.getAllColumnsInformation(index).getBytes(StandardCharsets.UTF_8));
}
} catch (FileNotFoundException e) {
LOG.info("show indexes: " + stringifyException(e));
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
index a2ccd56..03803bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.metadata.formatting;
import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -47,6 +48,7 @@
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hive.common.util.HiveStringUtils;
import java.math.BigInteger;
import java.util.ArrayList;
@@ -438,7 +440,7 @@ private static void getTableMetaDataInformation(StringBuilder tableInfo, Table
if (tbl.getParameters().size() > 0) {
tableInfo.append("Table Parameters:").append(LINE_DELIM);
- displayAllParameters(tbl.getParameters(), tableInfo);
+ displayAllParameters(tbl.getParameters(), tableInfo, false);
}
}
@@ -457,12 +459,28 @@ private static void getPartitionMetaDataInformation(StringBuilder tableInfo, Par
}
}
+ /**
+ * Display key, value pairs of the parameters. The characters will be escaped
+ * including unicode.
+ */
private static void displayAllParameters(Map params, StringBuilder tableInfo) {
+ displayAllParameters(params, tableInfo, true);
+ }
+
+ /**
+ * Display key, value pairs of the parameters. The characters will be escaped
+ * including unicode if escapeUnicode is true; otherwise the characters other
+ * than unicode will be escaped.
+ */
+
+ private static void displayAllParameters(Map params, StringBuilder tableInfo, boolean escapeUnicode) {
List keys = new ArrayList(params.keySet());
Collections.sort(keys);
for (String key : keys) {
tableInfo.append(FIELD_DELIM); // Ensures all params are indented.
- formatOutput(key, StringEscapeUtils.escapeJava(params.get(key)), tableInfo);
+ formatOutput(key,
+ escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) : HiveStringUtils.escapeJava(params.get(key)),
+ tableInfo);
}
}
diff --git a/ql/src/test/queries/clientpositive/unicode_comments.q b/ql/src/test/queries/clientpositive/unicode_comments.q
new file mode 100644
index 0000000..4d958e4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/unicode_comments.q
@@ -0,0 +1,17 @@
+create database unicode_comments_db comment '数据库';
+use unicode_comments_db;
+create table unicode_comments_tbl1
+(col1 string comment '第一列') comment '表格'
+partitioned by (p1 string comment '分割');
+create view unicode_comments_view1 (col1 comment '第一列') comment '视图'
+as select col1 from unicode_comments_tbl1;
+create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引';
+
+describe database extended unicode_comments_db;
+show create table unicode_comments_tbl1;
+describe formatted unicode_comments_tbl1;
+show create table unicode_comments_view1;
+describe formatted unicode_comments_view1;
+show formatted index on unicode_comments_tbl1;
+
+drop database unicode_comments_db cascade;
diff --git a/ql/src/test/results/clientpositive/unicode_comments.q.out b/ql/src/test/results/clientpositive/unicode_comments.q.out
new file mode 100644
index 0000000..4872cd3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/unicode_comments.q.out
@@ -0,0 +1,166 @@
+PREHOOK: query: create database unicode_comments_db comment '数据库'
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:unicode_comments_db
+POSTHOOK: query: create database unicode_comments_db comment '数据库'
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:unicode_comments_db
+PREHOOK: query: use unicode_comments_db
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:unicode_comments_db
+POSTHOOK: query: use unicode_comments_db
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:unicode_comments_db
+PREHOOK: query: create table unicode_comments_tbl1
+(col1 string comment '第一列') comment '表格'
+partitioned by (p1 string comment '分割')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:unicode_comments_db
+PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: create table unicode_comments_tbl1
+(col1 string comment '第一列') comment '表格'
+partitioned by (p1 string comment '分割')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:unicode_comments_db
+POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+PREHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') comment '视图'
+as select col1 from unicode_comments_tbl1
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+PREHOOK: Output: database:unicode_comments_db
+PREHOOK: Output: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') comment '视图'
+as select col1 from unicode_comments_tbl1
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: Output: database:unicode_comments_db
+POSTHOOK: Output: unicode_comments_db@unicode_comments_view1
+PREHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引'
+PREHOOK: type: CREATEINDEX
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment '索引'
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__
+PREHOOK: query: describe database extended unicode_comments_db
+PREHOOK: type: DESCDATABASE
+PREHOOK: Input: database:unicode_comments_db
+POSTHOOK: query: describe database extended unicode_comments_db
+POSTHOOK: type: DESCDATABASE
+POSTHOOK: Input: database:unicode_comments_db
+unicode_comments_db 数据库 location/in/test hive_test_user USER
+PREHOOK: query: show create table unicode_comments_tbl1
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: show create table unicode_comments_tbl1
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+CREATE TABLE `unicode_comments_tbl1`(
+ `col1` string COMMENT '第一列')
+COMMENT '表格'
+PARTITIONED BY (
+ `p1` string COMMENT '分割')
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted unicode_comments_tbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: describe formatted unicode_comments_tbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+# col_name data_type comment
+
+col1 string 第一列
+
+# Partition Information
+# col_name data_type comment
+
+p1 string 分割
+
+# Detailed Table Information
+Database: unicode_comments_db
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ comment 表格
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: show create table unicode_comments_view1
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: show create table unicode_comments_view1
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_view1
+CREATE VIEW `unicode_comments_view1` AS SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1`
+PREHOOK: query: describe formatted unicode_comments_view1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: describe formatted unicode_comments_view1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_view1
+# col_name data_type comment
+
+col1 string 第一列
+
+# Detailed Table Information
+Database: unicode_comments_db
+#### A masked pattern was here ####
+Retention: 0
+Table Type: VIRTUAL_VIEW
+Table Parameters:
+ comment 视图
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: null
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+
+# View Information
+View Original Text: select col1 from unicode_comments_tbl1
+View Expanded Text: SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1`
+PREHOOK: query: show formatted index on unicode_comments_tbl1
+PREHOOK: type: SHOWINDEXES
+POSTHOOK: query: show formatted index on unicode_comments_tbl1
+POSTHOOK: type: SHOWINDEXES
+idx_name tab_name col_names idx_tab_name idx_type comment
+
+
+index2 unicode_comments_tbl1 col1 unicode_comments_db__unicode_comments_tbl1_index2__ compact 索引
+PREHOOK: query: drop database unicode_comments_db cascade
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:unicode_comments_db
+PREHOOK: Output: database:unicode_comments_db
+PREHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__
+PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+PREHOOK: Output: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: drop database unicode_comments_db cascade
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:unicode_comments_db
+POSTHOOK: Output: database:unicode_comments_db
+POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__
+POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: Output: unicode_comments_db@unicode_comments_view1