Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-6887

ColumnBuilder misses FloatType

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 1.4.0
    • 1.4.0
    • SQL
    • None

    Description

      To reproduce ...

      import org.apache.spark.sql.types._
      import org.apache.spark.sql.Row
      
      val schema = StructType(StructField("c", FloatType, true) :: Nil)
      
      val rdd = sc.parallelize(1 to 100).map(i => Row(i.toFloat))
      
      sqlContext.createDataFrame(rdd, schema).registerTempTable("test")
      
      sqlContext.sql("cache table test")
      
      sqlContext.table("test").show
      

      The exception is ...

      15/04/13 15:00:12 INFO DAGScheduler: Job 0 failed: collect at SparkPlan.scala:88, took 0.474392 s
      org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 0.0 failed 1 times, most recent failure: Lost task 5.0 in stage 0.0 (TID 5, localhost): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableFloat cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableLong
      	at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.setLong(SpecificMutableRow.scala:292)
      	at org.apache.spark.sql.columnar.compression.LongDelta$Decoder.next(compressionSchemes.scala:539)
      	at org.apache.spark.sql.columnar.compression.CompressibleColumnAccessor$class.extractSingle(CompressibleColumnAccessor.scala:37)
      	at org.apache.spark.sql.columnar.NativeColumnAccessor.extractSingle(ColumnAccessor.scala:64)
      	at org.apache.spark.sql.columnar.BasicColumnAccessor.extractTo(ColumnAccessor.scala:54)
      	at org.apache.spark.sql.columnar.NativeColumnAccessor.org$apache$spark$sql$columnar$NullableColumnAccessor$$super$extractTo(ColumnAccessor.scala:64)
      	at org.apache.spark.sql.columnar.NullableColumnAccessor$class.extractTo(NullableColumnAccessor.scala:52)
      	at org.apache.spark.sql.columnar.NativeColumnAccessor.extractTo(ColumnAccessor.scala:64)
      	at org.apache.spark.sql.columnar.InMemoryColumnarTableScan$$anonfun$8$$anonfun$13$$anon$2.next(InMemoryColumnarTableScan.scala:295)
      	at org.apache.spark.sql.columnar.InMemoryColumnarTableScan$$anonfun$8$$anonfun$13$$anon$2.next(InMemoryColumnarTableScan.scala:290)
      	at scala.collection.Iterator$$anon$13.next(Iterator.scala:372)
      	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1$$anonfun$6.apply(Aggregate.scala:130)
      	at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1$$anonfun$6.apply(Aggregate.scala:126)
      	at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:640)
      	at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:640)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
      	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
      	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
      	at org.apache.spark.scheduler.Task.run(Task.scala:64)
      	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:210)
      	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
      	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
      	at java.lang.Thread.run(Thread.java:745)
      

      Attachments

        Activity

          People

            yhuai Yin Huai
            yhuai Yin Huai
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: