Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-15465

AnalysisException: cannot cast StructType to VectorUDT

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Not A Problem
    • 2.0.0
    • None
    • None
    • None
    • Oracle Java 8, Max OS X

    Description

      The following code throws exception when using latest Spark 2.0.0-SNAPSHOT:

      MapTest.scala
      import org.apache.spark.mllib.linalg.Vectors
      import org.apache.spark.sql.SQLContext
      import org.apache.spark.{SparkConf, SparkContext}
      
      object MapTest {
        def main(args: Array[String]): Unit = {
          val conf = new SparkConf()
            .setAppName("Foo Bar")
            .setMaster("local[1]")
          val sc = new SparkContext(conf)
          val sql = new SQLContext(sc)
      
          import sql.implicits._
      
          sql
            .createDataFrame(Seq((1, 2.0)))
            .toDF("key", "value")
            .map(r => (r.getInt(0), Vectors.dense(r.getDouble(1))))
            .toDF("key", "value")
            .show()
        }
      }
      

      Stack trace:

      Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(`_2` AS STRUCT<`type`: TINYINT, `size`: INT, `indices`: ARRAY<INT>, `values`: ARRAY<DOUBLE>>)' due to data type mismatch: cannot cast StructType(StructField(type,ByteType,false), StructField(size,IntegerType,true), StructField(indices,ArrayType(IntegerType,false),true), StructField(values,ArrayType(DoubleType,false),true)) to org.apache.spark.mllib.linalg.VectorUDT@f71b0bce;
      	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:66)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:58)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:287)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:287)
      	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:68)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:336)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
      	at scala.collection.immutable.List.foreach(List.scala:381)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
      	at scala.collection.immutable.List.map(List.scala:285)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:334)
      	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:893)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
      	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
      	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
      	at scala.collection.AbstractIterator.to(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
      	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
      	at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:336)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
      	at scala.collection.immutable.List.foreach(List.scala:381)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
      	at scala.collection.immutable.List.map(List.scala:285)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:334)
      	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:893)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
      	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
      	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
      	at scala.collection.AbstractIterator.to(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
      	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
      	at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307)
      	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:893)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
      	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
      	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
      	at scala.collection.AbstractIterator.to(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
      	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
      	at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:284)
      	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionUp$1(QueryPlan.scala:190)
      	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:200)
      	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2$1.apply(QueryPlan.scala:204)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
      	at scala.collection.immutable.List.foreach(List.scala:381)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
      	at scala.collection.immutable.List.map(List.scala:285)
      	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:204)
      	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$5.apply(QueryPlan.scala:209)
      	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:893)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
      	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
      	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
      	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
      	at scala.collection.AbstractIterator.to(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
      	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
      	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
      	at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
      	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:209)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:58)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:51)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:125)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:51)
      	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:56)
      	at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.resolve(ExpressionEncoder.scala:328)
      	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:205)
      	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:168)
      	at org.apache.spark.sql.Dataset$.apply(Dataset.scala:57)
      	at org.apache.spark.sql.Dataset.withTypedPlan(Dataset.scala:2498)
      	at org.apache.spark.sql.Dataset.map(Dataset.scala:1937)
      	at MapTest$.main(MapTest.scala:18)
      	at MapTest.main(MapTest.scala)
      	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
      	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
      	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
      	at java.lang.reflect.Method.invoke(Method.java:497)
      	at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            dzhukov@gmail.com Dmitry Zhukov
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: