Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-17354

java.lang.ClassCastException: java.lang.Integer cannot be cast to java.sql.Date

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Minor
    • Resolution: Fixed
    • 2.0.0
    • 2.0.1, 2.1.0
    • SQL
    • None

    Description

      Hive database has one table with column type Date. While running select query using Spark 2.0.0 SQL and calling show() function on DF throws ClassCastException. Same code is working fine on Spark 1.6.2. Please see the sample code below.

      import java.util.Calendar
      val now = Calendar.getInstance().getTime()
      case class Order(id : Int, customer : String, city : String, pdate : java.sql.Date)
      val orders = Seq(
            Order(1, "John S", "San Mateo", new java.sql.Date(now.getTime)),
            Order(2, "John D", "Redwood City", new java.sql.Date(now.getTime))
      	  )	  
      orders.toDF.createOrReplaceTempView("orders1")
      
      spark.sql("CREATE TABLE IF NOT EXISTS order(id INT, customer String,city String)PARTITIONED BY (pdate DATE)STORED AS PARQUETFILE")
      spark.sql("set hive.exec.dynamic.partition.mode=nonstrict")
      spark.sql("INSERT INTO TABLE order PARTITION(pdate) SELECT * FROM orders1")
      spark.sql("SELECT * FROM order").show()
      

      Exception details

      16/09/01 10:30:07 ERROR Executor: Exception in task 0.0 in stage 5.0 (TID 6)
      java.lang.ClassCastException: java.lang.Integer cannot be cast to java.sql.Date
      	at org.apache.spark.sql.execution.vectorized.ColumnVectorUtils.populate(ColumnVectorUtils.java:89)
      	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initBatch(VectorizedParquetRecordReader.java:185)
      	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initBatch(VectorizedParquetRecordReader.java:204)
      	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anonfun$buildReader$1.apply(ParquetFileFormat.scala:362)
      	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anonfun$buildReader$1.apply(ParquetFileFormat.scala:339)
      	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:116)
      	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:91)
      	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source)
      	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
      	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
      	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
      	at org.apache.spark.sql.execution.SparkPlan$$anonfun$4.apply(SparkPlan.scala:246)
      	at org.apache.spark.sql.execution.SparkPlan$$anonfun$4.apply(SparkPlan.scala:240)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:784)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:784)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
      	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
      	at org.apache.spark.scheduler.Task.run(Task.scala:85)
      	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
      	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      	at java.lang.Thread.run(Thread.java:745)
      

      Expected output

       
      +---+--------+------------+----------+
      | id|customer|        city|     pdate|
      +---+--------+------------+----------+
      |  1|  John S|   San Mateo|2016-09-01|
      |  2|  John D|Redwood City|2016-09-01|
      +---+--------+------------+----------+
      

      Workaround for Spark 2.0.0

      Setting enableVectorizedReader=false before show() method on DF returns expected result.

       
      spark.sql("set spark.sql.parquet.enableVectorizedReader=false")
      

      Attachments

        Activity

          People

            hyukjin.kwon Hyukjin Kwon
            baghelamit Amit Baghel
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: