Uploaded image for project: 'Apache Hudi'
  1. Apache Hudi
  2. HUDI-6858

Fix checkpoint reading in Spark structured streaming

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Blocker
    • Resolution: Fixed
    • 0.13.1
    • 0.14.0
    • None

    Description

      Spark Structured streaming fails due to commit metadata parsing

      Failed to parse HoodieCommitMetadata for [==>20230902092834911__compaction__REQUESTED]
      	at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:333) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:207) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      Caused by: org.apache.hudi.exception.HoodieIOException: Failed to parse HoodieCommitMetadata for [==>20230902092834911__compaction__REQUESTED]
      	at org.apache.hudi.common.util.CommitUtils.lambda$getValidCheckpointForCurrentWriter$1(CommitUtils.java:173) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197) ~[?:?]
      	at java.util.stream.SortedOps$SizedRefSortingSink.end(SortedOps.java:361) ~[?:?]
      	at java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:528) ~[?:?]
      	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:513) ~[?:?]
      	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) ~[?:?]
      	at java.util.stream.FindOps$FindOp.evaluateSequential(FindOps.java:150) ~[?:?]
      	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:?]
      	at java.util.stream.ReferencePipeline.findFirst(ReferencePipeline.java:647) ~[?:?]
      	at org.apache.hudi.common.util.CommitUtils.getValidCheckpointForCurrentWriter(CommitUtils.java:175) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.HoodieStreamingSink.canSkipBatch(HoodieStreamingSink.scala:313) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.HoodieStreamingSink.addBatch(HoodieStreamingSink.scala:104) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:729) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107) ~[spark-catalyst_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:250) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:123) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$9(SQLExecution.scala:160) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      
      Caused by: java.io.IOException: unable to read commit metadata
      	at org.apache.hudi.common.model.HoodieCommitMetadata.fromBytes(HoodieCommitMetadata.java:496) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.common.util.CommitUtils.lambda$getValidCheckpointForCurrentWriter$1(CommitUtils.java:163) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197) ~[?:?]
      	at java.util.stream.SortedOps$SizedRefSortingSink.end(SortedOps.java:361) ~[?:?]
      	at java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:528) ~[?:?]
      	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:513) ~[?:?]
      	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) ~[?:?]
      	at java.util.stream.FindOps$FindOp.evaluateSequential(FindOps.java:150) ~[?:?]
      	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:?]
      	at java.util.stream.ReferencePipeline.findFirst(ReferencePipeline.java:647) ~[?:?]
      	at org.apache.hudi.common.util.CommitUtils.getValidCheckpointForCurrentWriter(CommitUtils.java:175) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.HoodieStreamingSink.canSkipBatch(HoodieStreamingSink.scala:313) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.HoodieStreamingSink.addBatch(HoodieStreamingSink.scala:104) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:729) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107) ~[spark-catalyst_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      
      Caused by: com.fasterxml.jackson.core.JsonParseException: Unrecognized token 'Objavro': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')
       at [Source: (StringReader); line: 1, column: 11]
      	at com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2391) ~[jackson-core-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:745) ~[jackson-core-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.core.json.ReaderBasedJsonParser._reportInvalidToken(ReaderBasedJsonParser.java:2961) ~[jackson-core-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.core.json.ReaderBasedJsonParser._handleOddValue(ReaderBasedJsonParser.java:2002) ~[jackson-core-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.core.json.ReaderBasedJsonParser.nextToken(ReaderBasedJsonParser.java:802) ~[jackson-core-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4761) ~[jackson-databind-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4667) ~[jackson-databind-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3629) ~[jackson-databind-2.13.4.jar:2.13.4]
      	at com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3597) ~[jackson-databind-2.13.4.jar:2.13.4]
      	at org.apache.hudi.common.model.HoodieCommitMetadata.fromJsonString(HoodieCommitMetadata.java:240) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.common.model.HoodieCommitMetadata.fromBytes(HoodieCommitMetadata.java:494) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.common.util.CommitUtils.lambda$getValidCheckpointForCurrentWriter$1(CommitUtils.java:163) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197) ~[?:?]
      	at java.util.stream.SortedOps$SizedRefSortingSink.end(SortedOps.java:361) ~[?:?]
      	at java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:528) ~[?:?]
      	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:513) ~[?:?]
      	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) ~[?:?]
      	at java.util.stream.FindOps$FindOp.evaluateSequential(FindOps.java:150) ~[?:?]
      	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:?]
      	at java.util.stream.ReferencePipeline.findFirst(ReferencePipeline.java:647) ~[?:?]
      	at org.apache.hudi.common.util.CommitUtils.getValidCheckpointForCurrentWriter(CommitUtils.java:175) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.HoodieStreamingSink.canSkipBatch(HoodieStreamingSink.scala:313) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.hudi.HoodieStreamingSink.addBatch(HoodieStreamingSink.scala:104) ~[hudi-spark3-bundle_2.12-0.13.1-amzn-0.jar:0.13.1-amzn-0]
      	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:729) ~[spark-sql_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107) ~[spark-catalyst_2.12-3.4.0-amzn-0.jar:3.4.0-amzn-0]
      

      Attachments

        Issue Links

          Activity

            People

              guoyihua Ethan Guo (this is the old account; please use "yihua")
              guoyihua Ethan Guo (this is the old account; please use "yihua")
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: