diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 8801b4ef4686c654cbcc69e1241bbdf807afddbf..f13781943da240bcc81873daf4682ffee7415a5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -227,6 +227,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.SerDeSpec; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -3644,6 +3645,10 @@ private boolean isSchemaEvolutionEnabled(Table tbl) { return false; } + private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition part) { + return (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + } + private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException { EnvironmentContext environmentContext = alterTbl.getEnvironmentContext(); @@ -3661,11 +3666,12 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName())); tbl.setTableName(Utilities.getTableName(alterTbl.getNewName())); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) { + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); + String serializationLib = sd.getSerdeInfo().getSerializationLib(); + AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters()); List oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore()); - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); List newCols = alterTbl.getNewCols(); - String serializationLib = sd.getSerdeInfo().getSerializationLib(); if (serializationLib.equals( "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console @@ -3690,9 +3696,11 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.setCols(oldCols); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) { + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); + String serializationLib = sd.getSerdeInfo().getSerializationLib(); + AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters()); List oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore()); - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); List newCols = new ArrayList(); Iterator iterOldCols = oldCols.iterator(); String oldName = alterTbl.getOldColName(); @@ -3762,7 +3770,7 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.setCols(newCols); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); // change SerDe to LazySimpleSerDe if it is columnsetSerDe String serializationLib = sd.getSerdeInfo().getSerializationLib(); if (serializationLib.equals( @@ -3817,10 +3825,10 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); String serdeName = alterTbl.getSerdeName(); String oldSerdeName = sd.getSerdeInfo().getSerializationLib(); // if orc table, restrict changing the serde as it can break schema evolution @@ -3853,7 +3861,7 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); // if orc table, restrict changing the file format as it can break schema evolution if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) @@ -3866,7 +3874,7 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); // validate sort columns and bucket columns List columns = Utilities.getColumnNamesFromFieldSchema(tbl .getCols()); @@ -3891,7 +3899,7 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.setSortCols(alterTbl.getSortColumns()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); String newLocation = alterTbl.getNewLocation(); try { URI locUri = new URI(newLocation); diff --git a/ql/src/test/queries/clientnegative/avro_add_column_extschema.q b/ql/src/test/queries/clientnegative/avro_add_column_extschema.q new file mode 100644 index 0000000000000000000000000000000000000000..d36906c4f4749388872e2134280f909a557526cb --- /dev/null +++ b/ql/src/test/queries/clientnegative/avro_add_column_extschema.q @@ -0,0 +1,18 @@ +-- verify that we can modify avro tables created by externalschemas + +CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }'); + +DESCRIBE avro_extschema; + +ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/avro_add_column_extschema.q b/ql/src/test/queries/clientpositive/avro_add_column_extschema.q new file mode 100644 index 0000000000000000000000000000000000000000..12346780785a6fd6fa5f5c83be4e184df58a128e --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_add_column_extschema.q @@ -0,0 +1,48 @@ +-- verify that we can modify avro tables created by externalschemas if we drop avro-related tblproperties + +CREATE TABLE avro_extschema_literal +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }'); + +DESCRIBE avro_extschema_literal; + +ALTER TABLE avro_extschema_literal UNSET TBLPROPERTIES ('avro.schema.literal'); + +ALTER TABLE avro_extschema_literal +CHANGE COLUMN number number bigint; + +DESCRIBE avro_extschema_literal; + +ALTER TABLE avro_extschema_literal +ADD COLUMNS (age int); + +DESCRIBE avro_extschema_literal; + +dfs -cp ${system:hive.root}data/files/grad.avsc ${system:test.tmp.dir}/; + + +CREATE TABLE avro_extschema_url +STORED AS AVRO +TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/grad.avsc'); + +DESCRIBE avro_extschema_url; + +ALTER TABLE avro_extschema_url UNSET TBLPROPERTIES ('avro.schema.url'); + +ALTER TABLE avro_extschema_url +CHANGE COLUMN col6 col6 bigint; + +DESCRIBE avro_extschema_url; + +ALTER TABLE avro_extschema_url +ADD COLUMNS (col7 int); + +DESCRIBE avro_extschema_url; \ No newline at end of file diff --git a/ql/src/test/results/clientnegative/avro_add_column_extschema.q.out b/ql/src/test/results/clientnegative/avro_add_column_extschema.q.out new file mode 100644 index 0000000000000000000000000000000000000000..a1b1b9ca6ef432d3de33c31ab7f246f9043d1753 --- /dev/null +++ b/ql/src/test/results/clientnegative/avro_add_column_extschema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema +POSTHOOK: query: CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema +PREHOOK: query: DESCRIBE avro_extschema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema +POSTHOOK: query: DESCRIBE avro_extschema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema +number int +first_name string +last_name string +PREHOOK: query: ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@avro_extschema +PREHOOK: Output: default@avro_extschema +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Not allowed to alter schema of Avro stored table having external schema. Consider removing avro.schema.literal or avro.schema.url from table properties. diff --git a/ql/src/test/results/clientpositive/avro_add_column_extschema.q.out b/ql/src/test/results/clientpositive/avro_add_column_extschema.q.out new file mode 100644 index 0000000000000000000000000000000000000000..670dc1e21d3982eb80f5d160b374d491a06a4a3f --- /dev/null +++ b/ql/src/test/results/clientpositive/avro_add_column_extschema.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: CREATE TABLE avro_extschema_literal +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_literal +POSTHOOK: query: CREATE TABLE avro_extschema_literal +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_literal +PREHOOK: query: DESCRIBE avro_extschema_literal +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_literal +POSTHOOK: query: DESCRIBE avro_extschema_literal +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_literal +number int +first_name string +last_name string +PREHOOK: query: ALTER TABLE avro_extschema_literal UNSET TBLPROPERTIES ('avro.schema.literal') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@avro_extschema_literal +PREHOOK: Output: default@avro_extschema_literal +POSTHOOK: query: ALTER TABLE avro_extschema_literal UNSET TBLPROPERTIES ('avro.schema.literal') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@avro_extschema_literal +POSTHOOK: Output: default@avro_extschema_literal +PREHOOK: query: ALTER TABLE avro_extschema_literal +CHANGE COLUMN number number bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@avro_extschema_literal +PREHOOK: Output: default@avro_extschema_literal +POSTHOOK: query: ALTER TABLE avro_extschema_literal +CHANGE COLUMN number number bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@avro_extschema_literal +POSTHOOK: Output: default@avro_extschema_literal +PREHOOK: query: DESCRIBE avro_extschema_literal +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_literal +POSTHOOK: query: DESCRIBE avro_extschema_literal +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_literal +number bigint +first_name string +last_name string +PREHOOK: query: ALTER TABLE avro_extschema_literal +ADD COLUMNS (age int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@avro_extschema_literal +PREHOOK: Output: default@avro_extschema_literal +POSTHOOK: query: ALTER TABLE avro_extschema_literal +ADD COLUMNS (age int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@avro_extschema_literal +POSTHOOK: Output: default@avro_extschema_literal +PREHOOK: query: DESCRIBE avro_extschema_literal +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_literal +POSTHOOK: query: DESCRIBE avro_extschema_literal +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_literal +number bigint +first_name string +last_name string +age int +PREHOOK: query: CREATE TABLE avro_extschema_url +STORED AS AVRO +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_url +POSTHOOK: query: CREATE TABLE avro_extschema_url +STORED AS AVRO +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_url +PREHOOK: query: DESCRIBE avro_extschema_url +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_url +POSTHOOK: query: DESCRIBE avro_extschema_url +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_url +col1 string +col2 string +col3 double +col4 string +col5 string +col6 int +PREHOOK: query: ALTER TABLE avro_extschema_url UNSET TBLPROPERTIES ('avro.schema.url') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@avro_extschema_url +PREHOOK: Output: default@avro_extschema_url +POSTHOOK: query: ALTER TABLE avro_extschema_url UNSET TBLPROPERTIES ('avro.schema.url') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@avro_extschema_url +POSTHOOK: Output: default@avro_extschema_url +PREHOOK: query: ALTER TABLE avro_extschema_url +CHANGE COLUMN col6 col6 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@avro_extschema_url +PREHOOK: Output: default@avro_extschema_url +POSTHOOK: query: ALTER TABLE avro_extschema_url +CHANGE COLUMN col6 col6 bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@avro_extschema_url +POSTHOOK: Output: default@avro_extschema_url +PREHOOK: query: DESCRIBE avro_extschema_url +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_url +POSTHOOK: query: DESCRIBE avro_extschema_url +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_url +col1 string +col2 string +col3 double +col4 string +col5 string +col6 bigint +PREHOOK: query: ALTER TABLE avro_extschema_url +ADD COLUMNS (col7 int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@avro_extschema_url +PREHOOK: Output: default@avro_extschema_url +POSTHOOK: query: ALTER TABLE avro_extschema_url +ADD COLUMNS (col7 int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@avro_extschema_url +POSTHOOK: Output: default@avro_extschema_url +PREHOOK: query: DESCRIBE avro_extschema_url +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_url +POSTHOOK: query: DESCRIBE avro_extschema_url +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_url +col1 string +col2 string +col3 double +col4 string +col5 string +col6 bigint +col7 int diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index f18585da1d108abdd500437362eb388b21030ec7..391a300549d8e549fa3cfa089696a14b1240df1a 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -45,6 +45,7 @@ import java.util.Arrays; import java.util.List; import java.util.ArrayList; +import java.util.Map; import java.util.Properties; /** @@ -307,4 +308,25 @@ public static Schema getSchemaFor(URL url) { } } } + + /** + * Called on specific alter table events, removes schema url and schema literal from given tblproperties + * After the change, HMS solely will be responsible for handling the schema + * + * @param conf + * @param serializationLib + * @param parameters + */ + public static void handleAlterTableForAvro(HiveConf conf, String serializationLib, Map parameters) { + if (AvroSerDe.class.getName().equals(serializationLib)) { + String literalPropName = AvroTableProperties.SCHEMA_LITERAL.getPropName(); + String urlPropName = AvroTableProperties.SCHEMA_URL.getPropName(); + + if (parameters.containsKey(literalPropName) || parameters.containsKey(urlPropName)) { + throw new RuntimeException("Not allowed to alter schema of Avro stored table having external schema." + + " Consider removing "+AvroTableProperties.SCHEMA_LITERAL.getPropName() + " or " + + AvroTableProperties.SCHEMA_URL.getPropName() + " from table properties."); + } + } + } }