Uploaded image for project: 'Zeppelin'
  1. Zeppelin
  2. ZEPPELIN-5688

case class doesn't work in spark interpreter

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 0.10.1
    • 0.11.0
    • spark
    • None

    Description

      import org.apache.commons.io.IOUtils
      import java.net.URL
      import java.nio.charset.Charset// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)
      // So you don't need create them manually// load bank data
      val bankText = sc.parallelize(
          IOUtils.toString(
              new URL("https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv"),
              Charset.forName("utf8")).split("\n"))case class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)val bank = bankText.map(s => s.split(";")).filter(s => s(0) != "\"age\"").map(
          s => Bank(s(0).toInt, 
                  s(1).replaceAll("\"", ""),
                  s(2).replaceAll("\"", ""),
                  s(3).replaceAll("\"", ""),
                  s(5).replaceAll("\"", "").toInt
              )
      )
      
      bank.collect()
       

       

      org.apache.spark.SparkDriverExecutionException: Execution error
        at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1690)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2588)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279)
        at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
        at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
        ... 44 elided
      Caused by: java.lang.ArrayStoreException: [LBank;
        at scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:74)
        at org.apache.spark.SparkContext.$anonfun$runJob$4(SparkContext.scala:2235)
        at org.apache.spark.SparkContext.$anonfun$runJob$4$adapted(SparkContext.scala:2235)
        at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:59)
        at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1686)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2588)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) 

      Attachments

        Activity

          People

            kontinuation Kristin Cowalcijk
            zjffdu Jeff Zhang
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: