diff --git common/src/java/org/apache/hive/common/util/HiveStringUtils.java common/src/java/org/apache/hive/common/util/HiveStringUtils.java index 72c3fa9..507e369 100644 --- common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -77,6 +77,15 @@ }).with( new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())); + private static final CharSequenceTranslator ESCAPE_HIVE_COMMAND = + new LookupTranslator( + new String[][] { + {"'", "\\'"}, + {";", "\\;"}, + {"\\", "\\\\"}, + }).with( + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())); + /** * Maintain a String pool to reduce memory. */ @@ -622,7 +631,19 @@ public static String escapeString(String str, char escapeChar, */ public static String escapeJava(String str) { return ESCAPE_JAVA.translate(str); -} + } + + /** + * Escape non-unicode characters, and ', and ; + * Like StringEscapeUtil.escapeJava() will escape + * unicode characters as well but in some cases it's not desired. + * + * @param str Original string + * @return Escaped string + */ + public static String escapeHiveCommand(String str) { + return ESCAPE_HIVE_COMMAND.translate(str); + } /** * Unescape commas in the string using the default escape char diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a59b781..92461c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -50,7 +50,6 @@ import java.util.TreeMap; import java.util.TreeSet; -import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -228,6 +227,7 @@ import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.ToolRunner; import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -2046,7 +2046,8 @@ private int showCreateDatabase(Hive db, DataOutputStream outStream, String datab createDb_str.append("CREATE DATABASE `").append(database.getName()).append("`\n"); if (database.getDescription() != null) { createDb_str.append("COMMENT\n '"); - createDb_str.append(escapeHiveCommand(database.getDescription())).append("'\n"); + createDb_str.append( + HiveStringUtils.escapeHiveCommand(database.getDescription())).append("'\n"); } createDb_str.append("LOCATION\n '"); createDb_str.append(database.getLocationUri()).append("'\n"); @@ -2144,7 +2145,8 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam for (FieldSchema col : cols) { String columnDesc = " `" + col.getName() + "` " + col.getType(); if (col.getComment() != null) { - columnDesc = columnDesc + " COMMENT '" + escapeHiveCommand(col.getComment()) + "'"; + columnDesc = columnDesc + " COMMENT '" + + HiveStringUtils.escapeHiveCommand(col.getComment()) + "'"; } columns.add(columnDesc); } @@ -2155,7 +2157,8 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam String tabComment = tbl.getProperty("comment"); if (tabComment != null) { duplicateProps.add("comment"); - tbl_comment = "COMMENT '" + escapeHiveCommand(tabComment) + "'"; + tbl_comment = "COMMENT '" + + HiveStringUtils.escapeHiveCommand(tabComment) + "'"; } // Partitions @@ -2167,8 +2170,8 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam for (FieldSchema partKey : partKeys) { String partColDesc = " `" + partKey.getName() + "` " + partKey.getType(); if (partKey.getComment() != null) { - partColDesc = partColDesc + " COMMENT '" + - escapeHiveCommand(partKey.getComment()) + "'"; + partColDesc = partColDesc + " COMMENT '" + + HiveStringUtils.escapeHiveCommand(partKey.getComment()) + "'"; } partCols.add(partColDesc); } @@ -2211,7 +2214,8 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { SerDeInfo serdeInfo = sd.getSerdeInfo(); Map serdeParams = serdeInfo.getParameters(); tbl_row_format.append("ROW FORMAT SERDE \n"); - tbl_row_format.append(" '" + escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n"); + tbl_row_format.append(" '" + + HiveStringUtils.escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n"); if (tbl.getStorageHandler() == null) { // If serialization.format property has the default value, it will not to be included in // SERDE properties @@ -2222,20 +2226,21 @@ else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { if (!serdeParams.isEmpty()) { appendSerdeParams(tbl_row_format, serdeParams).append(" \n"); } - tbl_row_format.append("STORED AS INPUTFORMAT \n '" + - escapeHiveCommand(sd.getInputFormat()) + "' \n"); - tbl_row_format.append("OUTPUTFORMAT \n '" + - escapeHiveCommand(sd.getOutputFormat()) + "'"); + tbl_row_format.append("STORED AS INPUTFORMAT \n '" + + HiveStringUtils.escapeHiveCommand(sd.getInputFormat()) + "' \n"); + tbl_row_format.append("OUTPUTFORMAT \n '" + + HiveStringUtils.escapeHiveCommand(sd.getOutputFormat()) + "'"); } else { duplicateProps.add(META_TABLE_STORAGE); - tbl_row_format.append("STORED BY \n '" + escapeHiveCommand(tbl.getParameters().get( + tbl_row_format.append("STORED BY \n '" + + HiveStringUtils.escapeHiveCommand(tbl.getParameters().get( META_TABLE_STORAGE)) + "' \n"); // SerDe Properties if (!serdeParams.isEmpty()) { appendSerdeParams(tbl_row_format, serdeInfo.getParameters()); } } - String tbl_location = " '" + escapeHiveCommand(sd.getLocation()) + "'"; + String tbl_location = " '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'"; // Table properties duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); @@ -2271,7 +2276,7 @@ private String propertiesToString(Map props, List exclud for (String key : properties.keySet()) { if (properties.get(key) != null && (exclude == null || !exclude.contains(key))) { realProps.add(" '" + key + "'='" + - escapeHiveCommand(StringEscapeUtils.escapeJava(properties.get(key))) + "'"); + HiveStringUtils.escapeHiveCommand(properties.get(key)) + "'"); } } prop_string += StringUtils.join(realProps, ", \n"); @@ -2285,7 +2290,7 @@ private StringBuilder appendSerdeParams(StringBuilder builder, Map serdeCols = new ArrayList(); for (Entry entry : serdeParam.entrySet()) { serdeCols.add(" '" + entry.getKey() + "'='" - + escapeHiveCommand(StringEscapeUtils.escapeJava(entry.getValue())) + "'"); + + HiveStringUtils.escapeHiveCommand(entry.getValue()) + "'"); } builder.append(StringUtils.join(serdeCols, ", \n")).append(')'); return builder; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 03803bb..07d45e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -233,7 +233,7 @@ private static void formatWithoutIndentation(String name, String type, String co appendColumnStatsNoFormatting(colBuffer, "", "", "", "", "", "", "", ""); } } - colBuffer.append(comment == null ? "" : comment); + colBuffer.append(comment == null ? "" : HiveStringUtils.escapeJava(comment)); colBuffer.append(LINE_DELIM); } @@ -268,7 +268,8 @@ public static String getAllColumnsInformation(Index index) { IndexType indexType = HiveIndex.getIndexTypeByClassName(indexHandlerClass); indexColumns.add(indexType.getName()); - indexColumns.add(index.getParameters().get("comment")); + String comment = index.getParameters().get("comment"); + indexColumns.add(comment == null ? null : HiveStringUtils.escapeJava(comment)); formatOutput(indexColumns.toArray(new String[0]), indexInfo); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index 47d67b1..6e14c99 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Set; +import org.apache.hive.common.util.HiveStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -460,7 +461,7 @@ public void showDatabaseDescription(DataOutputStream outStream, String database, outStream.write(database.getBytes("UTF-8")); outStream.write(separator); if (comment != null) { - outStream.write(comment.getBytes("UTF-8")); + outStream.write(HiveStringUtils.escapeJava(comment).getBytes("UTF-8")); } outStream.write(separator); if (location != null) { diff --git ql/src/test/queries/clientpositive/escape_comments.q ql/src/test/queries/clientpositive/escape_comments.q new file mode 100644 index 0000000..8c38690 --- /dev/null +++ ql/src/test/queries/clientpositive/escape_comments.q @@ -0,0 +1,20 @@ +create database escape_comments_db comment 'a\nb'; +use escape_comments_db; +create table escape_comments_tbl1 +(col1 string comment 'a\nb\'\;') comment 'a\nb' +partitioned by (p1 string comment 'a\nb'); +create view escape_comments_view1 (col1 comment 'a\nb') comment 'a\nb' +as select col1 from escape_comments_tbl1; +create index index2 on table escape_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment 'a\nb'; + +describe database extended escape_comments_db; +describe database escape_comments_db; +show create table escape_comments_tbl1; +describe formatted escape_comments_tbl1; +describe pretty escape_comments_tbl1; +describe escape_comments_tbl1; +show create table escape_comments_view1; +describe formatted escape_comments_view1; +show formatted index on escape_comments_tbl1; + +drop database escape_comments_db cascade; diff --git ql/src/test/results/clientpositive/escape_comments.q.out ql/src/test/results/clientpositive/escape_comments.q.out new file mode 100644 index 0000000..25af243 --- /dev/null +++ ql/src/test/results/clientpositive/escape_comments.q.out @@ -0,0 +1,210 @@ +PREHOOK: query: create database escape_comments_db comment 'a\nb' +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:escape_comments_db +POSTHOOK: query: create database escape_comments_db comment 'a\nb' +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:escape_comments_db +PREHOOK: query: use escape_comments_db +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:escape_comments_db +POSTHOOK: query: use escape_comments_db +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:escape_comments_db +PREHOOK: query: create table escape_comments_tbl1 +(col1 string comment 'a\nb\';') comment 'a\nb' +partitioned by (p1 string comment 'a\nb') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:escape_comments_db +PREHOOK: Output: escape_comments_db@escape_comments_tbl1 +POSTHOOK: query: create table escape_comments_tbl1 +(col1 string comment 'a\nb\';') comment 'a\nb' +partitioned by (p1 string comment 'a\nb') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:escape_comments_db +POSTHOOK: Output: escape_comments_db@escape_comments_tbl1 +PREHOOK: query: create view escape_comments_view1 (col1 comment 'a\nb') comment 'a\nb' +as select col1 from escape_comments_tbl1 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: escape_comments_db@escape_comments_tbl1 +PREHOOK: Output: database:escape_comments_db +PREHOOK: Output: escape_comments_db@escape_comments_view1 +POSTHOOK: query: create view escape_comments_view1 (col1 comment 'a\nb') comment 'a\nb' +as select col1 from escape_comments_tbl1 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: Output: database:escape_comments_db +POSTHOOK: Output: escape_comments_db@escape_comments_view1 +PREHOOK: query: create index index2 on table escape_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment 'a\nb' +PREHOOK: type: CREATEINDEX +PREHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: query: create index index2 on table escape_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment 'a\nb' +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: Output: escape_comments_db@escape_comments_db__escape_comments_tbl1_index2__ +PREHOOK: query: describe database extended escape_comments_db +PREHOOK: type: DESCDATABASE +PREHOOK: Input: database:escape_comments_db +POSTHOOK: query: describe database extended escape_comments_db +POSTHOOK: type: DESCDATABASE +POSTHOOK: Input: database:escape_comments_db +escape_comments_db a\nb location/in/test hive_test_user USER +PREHOOK: query: describe database escape_comments_db +PREHOOK: type: DESCDATABASE +PREHOOK: Input: database:escape_comments_db +POSTHOOK: query: describe database escape_comments_db +POSTHOOK: type: DESCDATABASE +POSTHOOK: Input: database:escape_comments_db +escape_comments_db a\nb location/in/test hive_test_user USER +PREHOOK: query: show create table escape_comments_tbl1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: query: show create table escape_comments_tbl1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: escape_comments_db@escape_comments_tbl1 +CREATE TABLE `escape_comments_tbl1`( + `col1` string COMMENT 'a\nb\'\;') +COMMENT 'a\nb' +PARTITIONED BY ( + `p1` string COMMENT 'a\nb') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION +#### A masked pattern was here #### +TBLPROPERTIES ( +#### A masked pattern was here #### +PREHOOK: query: describe formatted escape_comments_tbl1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: query: describe formatted escape_comments_tbl1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: escape_comments_db@escape_comments_tbl1 +# col_name data_type comment + +col1 string a + b'; + +# Partition Information +# col_name data_type comment + +p1 string a + b + +# Detailed Table Information +Database: escape_comments_db +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + comment a\nb +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe pretty escape_comments_tbl1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: query: describe pretty escape_comments_tbl1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: escape_comments_db@escape_comments_tbl1 +col_name data_type comment + +col1 string a + b'; +p1 string a + b + +# Partition Information +col_name data_type comment + +p1 string a + b +PREHOOK: query: describe escape_comments_tbl1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: escape_comments_db@escape_comments_tbl1 +POSTHOOK: query: describe escape_comments_tbl1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: escape_comments_db@escape_comments_tbl1 +col1 string a + b'; +p1 string a + b + +# Partition Information +# col_name data_type comment + +p1 string a + b +PREHOOK: query: show create table escape_comments_view1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: escape_comments_db@escape_comments_view1 +POSTHOOK: query: show create table escape_comments_view1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: escape_comments_db@escape_comments_view1 +CREATE VIEW `escape_comments_view1` AS SELECT `col1` AS `col1` FROM (select `escape_comments_tbl1`.`col1` from `escape_comments_db`.`escape_comments_tbl1`) `escape_comments_db.escape_comments_view1` +PREHOOK: query: describe formatted escape_comments_view1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: escape_comments_db@escape_comments_view1 +POSTHOOK: query: describe formatted escape_comments_view1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: escape_comments_db@escape_comments_view1 +# col_name data_type comment + +col1 string a + b + +# Detailed Table Information +Database: escape_comments_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + comment a\nb +#### A masked pattern was here #### + +# Storage Information +SerDe Library: null +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] + +# View Information +View Original Text: select col1 from escape_comments_tbl1 +View Expanded Text: SELECT `col1` AS `col1` FROM (select `escape_comments_tbl1`.`col1` from `escape_comments_db`.`escape_comments_tbl1`) `escape_comments_db.escape_comments_view1` +PREHOOK: query: show formatted index on escape_comments_tbl1 +PREHOOK: type: SHOWINDEXES +POSTHOOK: query: show formatted index on escape_comments_tbl1 +POSTHOOK: type: SHOWINDEXES +idx_name tab_name col_names idx_tab_name idx_type comment + + +index2 escape_comments_tbl1 col1 escape_comments_db__escape_comments_tbl1_index2__ compact a\nb +PREHOOK: query: drop database escape_comments_db cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:escape_comments_db +PREHOOK: Output: database:escape_comments_db +PREHOOK: Output: escape_comments_db@escape_comments_db__escape_comments_tbl1_index2__ +PREHOOK: Output: escape_comments_db@escape_comments_tbl1 +PREHOOK: Output: escape_comments_db@escape_comments_view1 +POSTHOOK: query: drop database escape_comments_db cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:escape_comments_db +POSTHOOK: Output: database:escape_comments_db +POSTHOOK: Output: escape_comments_db@escape_comments_db__escape_comments_tbl1_index2__ +POSTHOOK: Output: escape_comments_db@escape_comments_tbl1 +POSTHOOK: Output: escape_comments_db@escape_comments_view1