Details
Description
Spark shell snippet for reproduction:
import sqlContext.implicits._ case class Inner(f: Int) case class Outer(i: Inner) Seq(Outer(null)).toDS().toDF().show() Seq(Outer(null)).toDS().show()
Expected output should be:
+----+ | i| +----+ |null| +----+ +----+ | i| +----+ |null| +----+
Actual output:
+------+ | i| +------+ |[null]| +------+ java.lang.RuntimeException: Error while decoding: java.lang.RuntimeException: Null value appeared in non-nullable field Inner.f of type scala.Int. If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (e.g. java.lang.Integer instead of int/scala.Int). newinstance(class $iwC$$iwC$Outer,if (isnull(input[0, StructType(StructField(f,IntegerType,false))])) null else newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)),false,ObjectType(class $iwC$$iwC$Outer),Some($iwC$$iwC@6ab35ce3)) +- if (isnull(input[0, StructType(StructField(f,IntegerType,false))])) null else newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)) :- isnull(input[0, StructType(StructField(f,IntegerType,false))]) : +- input[0, StructType(StructField(f,IntegerType,false))] :- null +- newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)) +- assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int) +- input[0, StructType(StructField(f,IntegerType,false))].f +- input[0, StructType(StructField(f,IntegerType,false))] at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:224) at org.apache.spark.sql.Dataset$$anonfun$collect$2.apply(Dataset.scala:704) at org.apache.spark.sql.Dataset$$anonfun$collect$2.apply(Dataset.scala:704) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108) at org.apache.spark.sql.Dataset.collect(Dataset.scala:704) at org.apache.spark.sql.Dataset.take(Dataset.scala:725) at org.apache.spark.sql.Dataset.showString(Dataset.scala:240) at org.apache.spark.sql.Dataset.show(Dataset.scala:230) at org.apache.spark.sql.Dataset.show(Dataset.scala:193) at org.apache.spark.sql.Dataset.show(Dataset.scala:201) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) at $iwC$$iwC$$iwC$$iwC.<init>(<console>:48) at $iwC$$iwC$$iwC.<init>(<console>:50) at $iwC$$iwC.<init>(<console>:52) at $iwC.<init>(<console>:54) at <init>(<console>:56) at .<init>(<console>:60) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:483) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:483) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.RuntimeException: Null value appeared in non-nullable field Inner.f of type scala.Int. If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (e.g. java.lang.Integer instead of int/scala.Int). at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:221) ... 62 more
We can see that there's an unexpected extra nested row in the first output, which causes the exception below.