diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b27b663b94f41a8250b79139ed9f7275b10cf9a3..9b3e335dc34e0ca4addb01d02b0f0c584a07d8ff 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1204,6 +1204,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", "SerDes retrieving schema from metastore. This is an internal parameter."), + HIVE_SERDES_AVRO_FROMEXTERNALSCHEMA_ALTERTABLE_ALLOWED("hive.serdes.avro.fromexternalschema.altertable.allowed", + false, + "Allows ALTER TABLE commands on an Avro stored table that was created from schema literal or url. " + + "Former URL or literal properties will be unset, schema will be handled by HMS after command execution."), + HIVEHISTORYFILELOC("hive.querylog.location", "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", "Location of Hive run time structured log file"), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index adabe70fa8f0fe1b990c6ac578a14ff5af06fc93..b8d06ff856da53eaabb96d41fc007f47355621a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -220,6 +220,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.SerDeSpec; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -3557,15 +3558,17 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); } + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); + String serializationLib = sd.getSerdeInfo().getSerializationLib(); + if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) { tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName())); tbl.setTableName(Utilities.getTableName(alterTbl.getNewName())); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) { + AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters()); List oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore()); - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); List newCols = alterTbl.getNewCols(); - String serializationLib = sd.getSerdeInfo().getSerializationLib(); if (serializationLib.equals( "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console @@ -3590,9 +3593,9 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.setCols(oldCols); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) { + AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters()); List oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore()); - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); List newCols = new ArrayList(); Iterator iterOldCols = oldCols.iterator(); String oldName = alterTbl.getOldColName(); @@ -3662,9 +3665,7 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.setCols(newCols); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); // change SerDe to LazySimpleSerDe if it is columnsetSerDe - String serializationLib = sd.getSerdeInfo().getSerializationLib(); if (serializationLib.equals( "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console @@ -3717,10 +3718,8 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); String serdeName = alterTbl.getSerdeName(); String oldSerdeName = sd.getSerdeInfo().getSerializationLib(); // if orc table, restrict changing the serde as it can break schema evolution @@ -3753,7 +3752,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); // if orc table, restrict changing the file format as it can break schema evolution if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) @@ -3766,7 +3764,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); // validate sort columns and bucket columns List columns = Utilities.getColumnNamesFromFieldSchema(tbl .getCols()); @@ -3791,7 +3788,6 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part sd.setSortCols(alterTbl.getSortColumns()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); String newLocation = alterTbl.getNewLocation(); try { URI locUri = new URI(newLocation); diff --git ql/src/test/queries/clientnegative/avro_add_column_extschema.q ql/src/test/queries/clientnegative/avro_add_column_extschema.q new file mode 100644 index 0000000000000000000000000000000000000000..22bcf25917c4d5b3732654eb65c2437aec6cc20c --- /dev/null +++ ql/src/test/queries/clientnegative/avro_add_column_extschema.q @@ -0,0 +1,20 @@ +-- verify that we can modify avro tables created by externalschemas + +SET hive.serdes.avro.fromexternalschema.altertable.allowed=false; + +CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }'); + +DESCRIBE avro_extschema; + +ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/avro_add_column_extschema.q ql/src/test/queries/clientpositive/avro_add_column_extschema.q new file mode 100644 index 0000000000000000000000000000000000000000..db74768999be570d068cffa6b89e8e84041ef85b --- /dev/null +++ ql/src/test/queries/clientpositive/avro_add_column_extschema.q @@ -0,0 +1,27 @@ +-- verify that we can modify avro tables created by externalschemas + +SET hive.serdes.avro.fromexternalschema.altertable.allowed=true; + +CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }'); + +DESCRIBE avro_extschema; + +ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint; + +DESCRIBE avro_extschema; + +ALTER TABLE avro_extschema +ADD COLUMNS (age int); + +DESCRIBE avro_extschema; \ No newline at end of file diff --git ql/src/test/results/clientnegative/avro_add_column_extschema.q.out ql/src/test/results/clientnegative/avro_add_column_extschema.q.out new file mode 100644 index 0000000000000000000000000000000000000000..1df7ea54cc4edcf9aa8fc46a5269feb2d2be1b33 --- /dev/null +++ ql/src/test/results/clientnegative/avro_add_column_extschema.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema +POSTHOOK: query: CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema +PREHOOK: query: DESCRIBE avro_extschema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema +POSTHOOK: query: DESCRIBE avro_extschema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema +number int +first_name string +last_name string +PREHOOK: query: ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@avro_extschema +PREHOOK: Output: default@avro_extschema +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Not allowed to alter schema of Avro stored table having external schema. Consider adjusting hive.serdes.avro.fromexternalschema.altertable.allowed=true diff --git ql/src/test/results/clientpositive/avro_add_column_extschema.q.out ql/src/test/results/clientpositive/avro_add_column_extschema.q.out new file mode 100644 index 0000000000000000000000000000000000000000..1dc9fb9c6fbe95a46741e35d4dd33607dcf942df --- /dev/null +++ ql/src/test/results/clientpositive/avro_add_column_extschema.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema +POSTHOOK: query: CREATE TABLE avro_extschema +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"number", "type":"int" }, + { "name":"first_name", "type":"string" }, + { "name":"last_name", "type":"string" } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema +PREHOOK: query: DESCRIBE avro_extschema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema +POSTHOOK: query: DESCRIBE avro_extschema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema +number int +first_name string +last_name string +PREHOOK: query: ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@avro_extschema +PREHOOK: Output: default@avro_extschema +POSTHOOK: query: ALTER TABLE avro_extschema +CHANGE COLUMN number number bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@avro_extschema +POSTHOOK: Output: default@avro_extschema +PREHOOK: query: DESCRIBE avro_extschema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema +POSTHOOK: query: DESCRIBE avro_extschema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema +number bigint +first_name string +last_name string +PREHOOK: query: ALTER TABLE avro_extschema +ADD COLUMNS (age int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@avro_extschema +PREHOOK: Output: default@avro_extschema +POSTHOOK: query: ALTER TABLE avro_extschema +ADD COLUMNS (age int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@avro_extschema +POSTHOOK: Output: default@avro_extschema +PREHOOK: query: DESCRIBE avro_extschema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema +POSTHOOK: query: DESCRIBE avro_extschema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema +number bigint +first_name string +last_name string +age int diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index f18585da1d108abdd500437362eb388b21030ec7..0f6ab6e9d6352d1ba9535e3bbf16812c6e14dab4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -45,6 +45,7 @@ import java.util.Arrays; import java.util.List; import java.util.ArrayList; +import java.util.Map; import java.util.Properties; /** @@ -307,4 +308,31 @@ public static Schema getSchemaFor(URL url) { } } } + + /** + * Called on specific alter table events, removes schema url and schema literal from given tblproperties + * After the change, HMS solely will be responsible for handling the schema + * + * @param conf + * @param serializationLib + * @param parameters + */ + public static void handleAlterTableForAvro(HiveConf conf, String serializationLib, Map parameters) { + if (AvroSerDe.class.getName().equals(serializationLib)) { + String literalPropName = AvroTableProperties.SCHEMA_LITERAL.getPropName(); + String urlPropName = AvroTableProperties.SCHEMA_URL.getPropName(); + + if (parameters.containsKey(literalPropName) || parameters.containsKey(urlPropName)) { + HiveConf.ConfVars confVar = HiveConf.ConfVars.HIVE_SERDES_AVRO_FROMEXTERNALSCHEMA_ALTERTABLE_ALLOWED; + if (!conf.getBoolVar(confVar)) { + throw new RuntimeException("Not allowed to alter schema of Avro stored table having external schema." + + " Consider adjusting "+confVar.varname+"=true"); + } + + LOG.warn("Removing Avro properties due to alter_table request: " + literalPropName + ", " + urlPropName); + parameters.remove(literalPropName); + parameters.remove(urlPropName); + } + } + } }