Description
This is a regression problem. How to reproduce this issue:
// Add this test to HiveSerDeReadWriteSuite test("SPARK-34512") { withTable("t1") { hiveClient.runSqlHive( """ |CREATE TABLE t1 | ROW FORMAT SERDE | 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' | STORED AS INPUTFORMAT | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' | OUTPUTFORMAT | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' | TBLPROPERTIES ( | 'avro.schema.literal'='{ | "namespace": "org.apache.spark.sql.hive.test", | "name": "schema_with_default_value", | "type": "record", | "fields": [ | { | "name": "ARRAY_WITH_DEFAULT", | "type": {"type": "array", "items": "string"}, | "default": null | } | ] | }') |""".stripMargin) spark.sql("select * from t1").show } }
org.apache.avro.AvroTypeException: Invalid default for field ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"} at org.apache.avro.Schema.validateDefault(Schema.java:1571) at org.apache.avro.Schema.access$500(Schema.java:87) at org.apache.avro.Schema$Field.<init>(Schema.java:544) at org.apache.avro.Schema.parse(Schema.java:1678) at org.apache.avro.Schema$Parser.parse(Schema.java:1425) at org.apache.avro.Schema$Parser.parse(Schema.java:1413) at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:268) at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:111) at org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187) at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107) at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83) at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533) at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:450) at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:437) at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:281) at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:263) at org.apache.hadoop.hive.ql.metadata.Table.getColsInternal(Table.java:641) at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:624) at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:831) at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:867) at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356) at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:820) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:291) at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224) at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223) at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273) at org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:800) at org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:787)
Attachments
Issue Links
- causes
-
SPARK-37069 HiveClientImpl throws NoSuchMethodError: org.apache.hadoop.hive.ql.metadata.Hive.getWithoutRegisterFns
- Resolved
- is related to
-
HIVE-24797 Disable validate default values when parsing Avro schemas
- Closed
- links to