Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-34512

Disable validate default values when parsing Avro schemas

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 3.2.0
    • 3.2.0
    • SQL
    • None

    Description

      This is a regression problem. How to reproduce this issue:

        // Add this test to HiveSerDeReadWriteSuite
        test("SPARK-34512") {
          withTable("t1") {
            hiveClient.runSqlHive(
              """
                |CREATE TABLE t1
                |  ROW FORMAT SERDE
                |  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
                |  STORED AS INPUTFORMAT
                |  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
                |  OUTPUTFORMAT
                |  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
                |  TBLPROPERTIES (
                |    'avro.schema.literal'='{
                |      "namespace": "org.apache.spark.sql.hive.test",
                |      "name": "schema_with_default_value",
                |      "type": "record",
                |      "fields": [
                |         {
                |           "name": "ARRAY_WITH_DEFAULT",
                |           "type": {"type": "array", "items": "string"},
                |           "default": null
                |         }
                |       ]
                |    }')
                |""".stripMargin)
      
            spark.sql("select * from t1").show
          }
        }
      
      org.apache.avro.AvroTypeException: Invalid default for field ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"}
      	at org.apache.avro.Schema.validateDefault(Schema.java:1571)
      	at org.apache.avro.Schema.access$500(Schema.java:87)
      	at org.apache.avro.Schema$Field.<init>(Schema.java:544)
      	at org.apache.avro.Schema.parse(Schema.java:1678)
      	at org.apache.avro.Schema$Parser.parse(Schema.java:1425)
      	at org.apache.avro.Schema$Parser.parse(Schema.java:1413)
      	at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:268)
      	at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:111)
      	at org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187)
      	at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107)
      	at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83)
      	at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533)
      	at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:450)
      	at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:437)
      	at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:281)
      	at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:263)
      	at org.apache.hadoop.hive.ql.metadata.Table.getColsInternal(Table.java:641)
      	at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:624)
      	at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:831)
      	at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:867)
      	at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356)
      	at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354)
      	at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
      	at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
      	at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183)
      	at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839)
      	at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526)
      	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
      	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:820)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:291)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:800)
      	at org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:787)
      
      

      Attachments

        Issue Links

          Activity

            People

              yumwang Yuming Wang
              yumwang Yuming Wang
              Votes:
              0 Vote for this issue
              Watchers:
              7 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: