diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index d759739..ee34984 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -427,6 +427,10 @@ //{2} should be lockid LOCK_ACQUIRE_TIMEDOUT(10307, "Lock acquisition for {0} timed out after {1}ms. {2}", true), COMPILE_LOCK_TIMED_OUT(10308, "Attempt to acquire compile lock timed out.", true), + CANNOT_CHANGE_SERDE(10309, "Changing SerDe (from {0}) is not supported for table {1}. File format may be incompatible", true), + CANNOT_CHANGE_FILEFORMAT(10310, "Changing file format (from {0}) is not supported for table {1}", true), + CANNOT_REORDER_COLUMNS(10311, "Reordering columns is not supported for table {0}. SerDe may be incompatible", true), + CANNOT_CHANGE_COLUMN_TYPE(10311, "Changing from type {0} to {1} is not supported for table {2}. SerDe may be incompatible", true), //========================== 20000 range starts here ========================// SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 30cae88..9b7c78b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.mapreduce.MRJobConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -3295,6 +3297,11 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part String comment = alterTbl.getNewColComment(); boolean first = alterTbl.getFirst(); String afterCol = alterTbl.getAfterCol(); + // if orc table, restrict reordering columns as it will break schema evolution + boolean isOrc = sd.getInputFormat().equals(OrcInputFormat.class.getName()); + if ( isOrc && (first || (afterCol != null && !afterCol.trim().isEmpty()))) { + throw new HiveException(ErrorMsg.CANNOT_REORDER_COLUMNS, alterTbl.getOldName()); + } FieldSchema column = null; boolean found = false; @@ -3312,6 +3319,12 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName); } else if (oldColName.equalsIgnoreCase(oldName)) { col.setName(newName); + // if orc table, restrict changing column types. Only integer type promotion is supported. + // tinyint -> smallint -> int -> bigint + if (isOrc && !isSupportedTypeChange(col.getType(), type)) { + throw new HiveException(ErrorMsg.CANNOT_CHANGE_COLUMN_TYPE, col.getType(), type, + alterTbl.getOldName()); + } if (type != null && !type.trim().equals("")) { col.setType(type); } @@ -3380,6 +3393,10 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); String serdeName = alterTbl.getSerdeName(); String oldSerdeName = sd.getSerdeInfo().getSerializationLib(); + // if orc table, restrict changing the serde as it can break schema evolution + if (oldSerdeName.equalsIgnoreCase("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) { + throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, "OrcSerde", alterTbl.getOldName()); + } sd.getSerdeInfo().setSerializationLib(serdeName); if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) { sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); @@ -3404,6 +3421,11 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) { StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + // if orc table, restrict changing the file format as it can break schema evolution + if (sd.getInputFormat().equals(OrcInputFormat.class.getName()) + && !alterTbl.getInputFormat().equals(OrcInputFormat.class.getName())) { + throw new HiveException(ErrorMsg.CANNOT_CHANGE_FILEFORMAT, "ORC", alterTbl.getOldName()); + } sd.setInputFormat(alterTbl.getInputFormat()); sd.setOutputFormat(alterTbl.getOutputFormat()); if (alterTbl.getSerdeName() != null) { @@ -3510,6 +3532,46 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part return 0; } + + // don't change the order of enums as ordinal values are used to check for valid type promotions + enum PromotableTypes { + TINYINT, + SMALLINT, + INT, + BIGINT; + + static List types() { + return ImmutableList.of(TINYINT.toString().toLowerCase(), SMALLINT.toString().toLowerCase(), + INT.toString().toLowerCase(), BIGINT.toString().toLowerCase()); + } + } + + // for ORC, only supported type promotions are tinyint -> smallint -> int -> bigint. No other + // type promotions are supported at this point + private boolean isSupportedTypeChange(String currentType, String newType) { + if (currentType != null && newType != null) { + currentType = currentType.toLowerCase().trim(); + newType = newType.toLowerCase().trim(); + // no type change + if (currentType.equals(newType)) { + return true; + } + if (PromotableTypes.types().contains(currentType) + && PromotableTypes.types().contains(newType)) { + PromotableTypes pCurrentType = PromotableTypes.valueOf(currentType.toUpperCase()); + PromotableTypes pNewType = PromotableTypes.valueOf(newType.toUpperCase()); + if (pNewType.ordinal() >= pCurrentType.ordinal()) { + return true; + } else { + return false; + } + } else { + return false; + } + } + return true; + } + /** * Drop a given table or some partitions. DropTableDesc is currently used for both. * diff --git a/ql/src/test/queries/clientnegative/orc_change_fileformat.q b/ql/src/test/queries/clientnegative/orc_change_fileformat.q new file mode 100644 index 0000000..5b2a7e6 --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_change_fileformat.q @@ -0,0 +1,2 @@ +create table src_orc (key tinyint, val string) stored as orc; +alter table src_orc set fileformat textfile; diff --git a/ql/src/test/queries/clientnegative/orc_change_serde.q b/ql/src/test/queries/clientnegative/orc_change_serde.q new file mode 100644 index 0000000..e7b70fd --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_change_serde.q @@ -0,0 +1,2 @@ +create table src_orc (key tinyint, val string) stored as orc; +alter table src_orc set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; diff --git a/ql/src/test/queries/clientnegative/orc_reorder_columns1.q b/ql/src/test/queries/clientnegative/orc_reorder_columns1.q new file mode 100644 index 0000000..2f43ddb --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_reorder_columns1.q @@ -0,0 +1,2 @@ +create table src_orc (key tinyint, val string) stored as orc; +alter table src_orc change key k tinyint first; diff --git a/ql/src/test/queries/clientnegative/orc_reorder_columns2.q b/ql/src/test/queries/clientnegative/orc_reorder_columns2.q new file mode 100644 index 0000000..3634d2d --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_reorder_columns2.q @@ -0,0 +1,2 @@ +create table src_orc (key tinyint, val string) stored as orc; +alter table src_orc change key k tinyint after val; diff --git a/ql/src/test/queries/clientnegative/orc_replace_columns.q b/ql/src/test/queries/clientnegative/orc_replace_columns.q new file mode 100644 index 0000000..e5f9449 --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_replace_columns.q @@ -0,0 +1,2 @@ +create table src_orc (key tinyint, val string) stored as orc; +alter table src_orc replace columns (k int); diff --git a/ql/src/test/queries/clientnegative/orc_type_promotion1.q b/ql/src/test/queries/clientnegative/orc_type_promotion1.q new file mode 100644 index 0000000..e465b2a --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_type_promotion1.q @@ -0,0 +1,2 @@ +create table src_orc (key tinyint, val string) stored as orc; +alter table src_orc change key key float; diff --git a/ql/src/test/queries/clientnegative/orc_type_promotion2.q b/ql/src/test/queries/clientnegative/orc_type_promotion2.q new file mode 100644 index 0000000..61724f5 --- /dev/null +++ b/ql/src/test/queries/clientnegative/orc_type_promotion2.q @@ -0,0 +1,11 @@ +create table src_orc (key tinyint, val string) stored as orc; +desc src_orc; +alter table src_orc change key key tinyint; +desc src_orc; +alter table src_orc change key key smallint; +desc src_orc; +alter table src_orc change key key int; +desc src_orc; +alter table src_orc change key key bigint; +desc src_orc; +alter table src_orc change val val char(100); diff --git a/ql/src/test/results/clientnegative/orc_change_fileformat.q.out b/ql/src/test/results/clientnegative/orc_change_fileformat.q.out new file mode 100644 index 0000000..db454fe --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_change_fileformat.q.out @@ -0,0 +1,13 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: alter table src_orc set fileformat textfile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Changing file format (from ORC) is not supported for table default.src_orc diff --git a/ql/src/test/results/clientnegative/orc_change_serde.q.out b/ql/src/test/results/clientnegative/orc_change_serde.q.out new file mode 100644 index 0000000..7f882b5 --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_change_serde.q.out @@ -0,0 +1,13 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: alter table src_orc set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Changing SerDe (from OrcSerde) is not supported for table default.src_orc. File format may be incompatible diff --git a/ql/src/test/results/clientnegative/orc_reorder_columns1.q.out b/ql/src/test/results/clientnegative/orc_reorder_columns1.q.out new file mode 100644 index 0000000..c581f4e --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_reorder_columns1.q.out @@ -0,0 +1,13 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: alter table src_orc change key k tinyint first +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Reordering columns is not supported for table default.src_orc. SerDe may be incompatible diff --git a/ql/src/test/results/clientnegative/orc_reorder_columns2.q.out b/ql/src/test/results/clientnegative/orc_reorder_columns2.q.out new file mode 100644 index 0000000..54dcdec --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_reorder_columns2.q.out @@ -0,0 +1,13 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: alter table src_orc change key k tinyint after val +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Reordering columns is not supported for table default.src_orc. SerDe may be incompatible diff --git a/ql/src/test/results/clientnegative/orc_replace_columns.q.out b/ql/src/test/results/clientnegative/orc_replace_columns.q.out new file mode 100644 index 0000000..296a069 --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_replace_columns.q.out @@ -0,0 +1,13 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: alter table src_orc replace columns (k int) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Replace columns is not supported for table default.src_orc. SerDe may be incompatible. diff --git a/ql/src/test/results/clientnegative/orc_type_promotion1.q.out b/ql/src/test/results/clientnegative/orc_type_promotion1.q.out new file mode 100644 index 0000000..633b4da --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_type_promotion1.q.out @@ -0,0 +1,13 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: alter table src_orc change key key float +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Changing from type tinyint to float is not supported for table default.src_orc. SerDe may be incompatible diff --git a/ql/src/test/results/clientnegative/orc_type_promotion2.q.out b/ql/src/test/results/clientnegative/orc_type_promotion2.q.out new file mode 100644 index 0000000..49e2f89 --- /dev/null +++ b/ql/src/test/results/clientnegative/orc_type_promotion2.q.out @@ -0,0 +1,85 @@ +PREHOOK: query: create table src_orc (key tinyint, val string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc (key tinyint, val string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: desc src_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc +POSTHOOK: query: desc src_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc +key tinyint +val string +PREHOOK: query: alter table src_orc change key key tinyint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +POSTHOOK: query: alter table src_orc change key key tinyint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc +PREHOOK: query: desc src_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc +POSTHOOK: query: desc src_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc +key tinyint +val string +PREHOOK: query: alter table src_orc change key key smallint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +POSTHOOK: query: alter table src_orc change key key smallint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc +PREHOOK: query: desc src_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc +POSTHOOK: query: desc src_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc +key smallint +val string +PREHOOK: query: alter table src_orc change key key int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +POSTHOOK: query: alter table src_orc change key key int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc +PREHOOK: query: desc src_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc +POSTHOOK: query: desc src_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc +key int +val string +PREHOOK: query: alter table src_orc change key key bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +POSTHOOK: query: alter table src_orc change key key bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc +PREHOOK: query: desc src_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc +POSTHOOK: query: desc src_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc +key bigint +val string +PREHOOK: query: alter table src_orc change val val char(100) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Changing from type string to char(100) is not supported for table default.src_orc. SerDe may be incompatible