Uploaded image for project: 'Apache Sedona'
  1. Apache Sedona
  2. SEDONA-61

NegativeArraySizeException when opening shape file in spark

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • None
    • 1.2.1
    • None
    • Databricks, 7.4 (includes Apache Spark 3.0.1, Scala 2.12)

    Description

      HI

      I came across an issue when opening Esri shape file in Spark. I am using following shape file https://datamap.gov.wales/layers/inspire-nrw:NRW_WFD_RIVER_CATCHMENTS_C2

       

      val spatialRDD = ShapefileReader.readToGeometryRDD(sc, shapefileInputLocation) 
      var spatialDf = Adapter.toDf(spatialRDD, spark)

       

      Log:

      Caused by: NegativeArraySizeException:
      at org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.dbf.DbfParseUtil.parsePrimitiveRecord(DbfParseUtil.java:175) at org.apache.sedona.core.formatMapper.shapefileParser.shapes.DbfFileReader.nextKeyValue(DbfFileReader.java:72) at org.apache.sedona.core.formatMapper.shapefileParser.shapes.CombineShapeReader.nextKeyValue(CombineShapeReader.java:145) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:253) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:187) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:642) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:645) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2519) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2466) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2460) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2460) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1152) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1152) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1152) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2721) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2668) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2656) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2339) at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:298) at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:308) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:82) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:88) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:61) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:57) at org.apache.spark.sql.execution.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:527) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:527) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:471) at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:58) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3049) at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3772) at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2773) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3764) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:841) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3762) at org.apache.spark.sql.Dataset.head(Dataset.scala:2773) at org.apache.spark.sql.Dataset.take(Dataset.scala:2980) at org.apache.spark.sql.Dataset.getRows(Dataset.scala:307) at org.apache.spark.sql.Dataset.showString(Dataset.scala:344) at org.apache.spark.sql.Dataset.show(Dataset.scala:840) at org.apache.spark.sql.Dataset.show(Dataset.scala:799) at org.apache.spark.sql.Dataset.show(Dataset.scala:808) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2960440977724125:3) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-2960440977724125:62) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$$iw$$iw$$iw$$iw.<init>(command-2960440977724125:64) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$$iw$$iw$$iw.<init>(command-2960440977724125:66) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$$iw$$iw.<init>(command-2960440977724125:68) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$$iw.<init>(command-2960440977724125:70) at linef1162c2e1a0d4a2bba222423a205b77c35.$read.<init>(command-2960440977724125:72) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$.<init>(command-2960440977724125:76) at linef1162c2e1a0d4a2bba222423a205b77c35.$read$.<clinit>(command-2960440977724125) at linef1162c2e1a0d4a2bba222423a205b77c35.$eval$.$print$lzycompute(<notebook>:7) at linef1162c2e1a0d4a2bba222423a205b77c35.$eval$.$print(<notebook>:6) at linef1162c2e1a0d4a2bba222423a205b77c35.$eval.$print(<notebook>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1021) at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:574) at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:41) at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:37) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41) at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:573) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:600) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:570) at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:219) at com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:233) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:773) at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:726) at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:233) at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$10(DriverLocal.scala:431) at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:239) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:234) at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:231) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:48) at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:276) at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:269) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:48) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:408) at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:690) at scala.util.Try$.apply(Try.scala:213) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:682) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:523) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:635) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:428) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:371) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:223) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.NegativeArraySizeException at org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.dbf.DbfParseUtil.parsePrimitiveRecord(DbfParseUtil.java:175) at org.apache.sedona.core.formatMapper.shapefileParser.shapes.DbfFileReader.nextKeyValue(DbfFileReader.java:72) at org.apache.sedona.core.formatMapper.shapefileParser.shapes.CombineShapeReader.nextKeyValue(CombineShapeReader.java:145) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:253) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:187) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:642) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:645) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)

       

      Attachments

        Activity

          People

            Unassigned Unassigned
            Krzysztof Nojman Krzysztof Nojman
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: