Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-19743

Exception when creating more than one implicit Encoder in REPL

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Incomplete
    • 2.0.2, 2.1.1, 2.2.0
    • None
    • Spark Shell

    Description

      During test I wanted to create 2 different bean classes and encoders for them.
      First time it worked.

      scala> class Test(@scala.beans.BeanProperty var xxx: Long) {}
      defined class Test
      
      scala> import org.apache.spark.sql.Encoders
      import org.apache.spark.sql.Encoders
      
      scala> implicit val testEncoder = Encoders.bean(classOf[Test])
      testEncoder: org.apache.spark.sql.Encoder[Test] = class[xxx[0]: bigint]
      
      scala> spark.range(10).map(new Test(_)).show()
      +---+
      |xxx|
      +---+
      |  0|
      |  1|
      |  2|
      |  3|
      |  4|
      |  5|
      |  6|
      |  7|
      |  8|
      |  9|
      +---+
      

      Second try give me exception.

      scala> class Test2(@scala.beans.BeanProperty var xxx: Long) {}
      defined class Test2
      
      scala> implicit val test2Encoder = Encoders.bean(classOf[Test2])
      test2Encoder: org.apache.spark.sql.Encoder[Test2] = class[xxx[0]: bigint]
      
      scala> spark.range(10).map(new Test2(_)).show()
      
      17/02/26 18:10:15 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 2, cdh-data-4.gid): java.lang.ExceptionInInitializerError
              at $line17.$read$$iw.<init>(<console>:9)
              at $line17.$read.<init>(<console>:45)
              at $line17.$read$.<init>(<console>:49)
              at $line17.$read$.<clinit>(<console>)
              at $line19.$read$$iw.<init>(<console>:10)
              at $line19.$read.<init>(<console>:21)
              at $line19.$read$.<init>(<console>:25)
              at $line19.$read$.<clinit>(<console>)
              at $line21.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:32)
              at $line21.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:32)
              at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
              at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
              at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
              at org.apache.spark.sql.execution.SparkPlan$$anonfun$4.apply(SparkPlan.scala:246)
              at org.apache.spark.sql.execution.SparkPlan$$anonfun$4.apply(SparkPlan.scala:240)
              at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:803)
              at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:803)
              at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
              at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
              at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
              at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
              at org.apache.spark.scheduler.Task.run(Task.scala:86)
              at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
              at java.lang.Thread.run(Thread.java:745)
      Caused by: org.apache.spark.SparkException: A master URL must be set in your configuration
              at org.apache.spark.SparkContext.<init>(SparkContext.scala:368)
              at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2258)
              at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
              at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
              at scala.Option.getOrElse(Option.scala:121)
              at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
              at org.apache.spark.repl.Main$.createSparkSession(Main.scala:95)
              at $line3.$read$$iw$$iw.<init>(<console>:15)
              at $line3.$read$$iw.<init>(<console>:31)
              at $line3.$read.<init>(<console>:33)
              at $line3.$read$.<init>(<console>:37)
              at $line3.$read$.<clinit>(<console>)
              ... 26 more
      
      17/02/26 18:10:15 ERROR TaskSetManager: Task 0 in stage 2.0 failed 1 times; aborting job
      org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 2, cdh-data-4.gid): java.lang.ExceptionInInitializerError
              at <init>(<console>:9)
              at <init>(<console>:45)
              at .<init>(<console>:49)
              at .<clinit>(<console>)
              at <init>(<console>:10)
              at <init>(<console>:21)
              at .<init>(<console>:25)
              at .<clinit>(<console>)
              at $anonfun$1.apply(<console>:32)
              at $anonfun$1.apply(<console>:32)
              at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
              at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
              at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
              at org.apache.spark.sql.execution.SparkPlan$$anonfun$4.apply(SparkPlan.scala:246)
              at org.apache.spark.sql.execution.SparkPlan$$anonfun$4.apply(SparkPlan.scala:240)
              at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:803)
              at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:803)
              at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
              at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
              at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
              at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
              at org.apache.spark.scheduler.Task.run(Task.scala:86)
              at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
              at java.lang.Thread.run(Thread.java:745)
      Caused by: org.apache.spark.SparkException: A master URL must be set in your configuration
              at org.apache.spark.SparkContext.<init>(SparkContext.scala:368)
              at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2258)
              at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
              at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
              at scala.Option.getOrElse(Option.scala:121)
              at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
              at org.apache.spark.repl.Main$.createSparkSession(Main.scala:95)
              at <init>(<console>:15)
              at <init>(<console>:31)
              at <init>(<console>:33)
              at .<init>(<console>:37)
              at .<clinit>(<console>)
              ... 26 more
      
      Driver stacktrace:
        at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
        at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
        at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
        at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
        at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
        at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
        at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
        at scala.Option.foreach(Option.scala:257)
        at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1886)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1899)
        at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:347)
        at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:39)
        at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2193)
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
        at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546)
        at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2192)
        at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2199)
        at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1935)
        at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1934)
        at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2576)
        at org.apache.spark.sql.Dataset.head(Dataset.scala:1934)
        at org.apache.spark.sql.Dataset.take(Dataset.scala:2149)
        at org.apache.spark.sql.Dataset.showString(Dataset.scala:239)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:526)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:486)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:495)
        ... 48 elided
      Caused by: java.lang.ExceptionInInitializerError: org.apache.spark.SparkException: A master URL must be set in your configuration
      Caused by: org.apache.spark.SparkException: A master URL must be set in your configuration
        at org.apache.spark.SparkContext.<init>(SparkContext.scala:368)
        at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2258)
        at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
        at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
        at scala.Option.getOrElse(Option.scala:121)
        at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
        at org.apache.spark.repl.Main$.createSparkSession(Main.scala:95)
        ... 26 more
      
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            maver1ck Maciej BryƄski
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: