Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-6247

Certain self joins cannot be analyzed

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Blocker
    • Resolution: Fixed
    • None
    • 1.3.1
    • SQL
    • None

    Description

      When you try the following code

      val df =
         (1 to 10)
            .map(i => (i, i.toDouble, i.toLong, i.toString, i.toString))
            .toDF("intCol", "doubleCol", "longCol", "stringCol1", "stringCol2")
      
      df.registerTempTable("test")
      
      sql(
        """
        |SELECT x.stringCol2, avg(y.intCol), sum(x.doubleCol)
        |FROM test x JOIN test y ON (x.stringCol1 = y.stringCol1)
        |GROUP BY x.stringCol2
        """.stripMargin).explain()
      

      The following exception will be thrown.

      [info]   java.util.NoSuchElementException: next on empty iterator
      [info]   at scala.collection.Iterator$$anon$2.next(Iterator.scala:39)
      [info]   at scala.collection.Iterator$$anon$2.next(Iterator.scala:37)
      [info]   at scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:64)
      [info]   at scala.collection.IterableLike$class.head(IterableLike.scala:91)
      [info]   at scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$head(ArrayBuffer.scala:47)
      [info]   at scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:120)
      [info]   at scala.collection.mutable.ArrayBuffer.head(ArrayBuffer.scala:47)
      [info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:247)
      [info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:197)
      [info]   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250)
      [info]   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250)
      [info]   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:50)
      [info]   at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:249)
      [info]   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:263)
      [info]   at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      [info]   at scala.collection.Iterator$class.foreach(Iterator.scala:727)
      [info]   at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
      [info]   at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
      [info]   at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
      [info]   at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
      [info]   at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
      [info]   at scala.collection.AbstractIterator.to(Iterator.scala:1157)
      [info]   at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
      [info]   at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
      [info]   at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
      [info]   at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
      [info]   at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:292)
      [info]   at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:247)
      [info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:197)
      [info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:196)
      [info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
      [info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
      [info]   at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
      [info]   at scala.collection.immutable.List.foldLeft(List.scala:84)
      [info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
      [info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
      [info]   at scala.collection.immutable.List.foreach(List.scala:318)
      [info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
      [info]   at org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1071)
      [info]   at org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1071)
      [info]   at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1069)
      [info]   at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
      [info]   at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
      [info]   at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:915)
      [info]   at org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply$mcV$sp(SparkSqlSerializer2Suite.scala:66)
      [info]   at org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply(SparkSqlSerializer2Suite.scala:48)
      [info]   at org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply(SparkSqlSerializer2Suite.scala:48)
      [info]   at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
      [info]   at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
      [info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
      [info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
      [info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
      [info]   at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
      [info]   at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
      [info]   at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
      [info]   at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
      [info]   at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
      [info]   at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
      [info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
      [info]   at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
      [info]   at org.scalatest.FunSuite.runTest(FunSuite.scala:1555)
      [info]   at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
      [info]   at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
      [info]   at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
      [info]   at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
      [info]   at scala.collection.immutable.List.foreach(List.scala:318)
      [info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
      [info]   at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
      [info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
      [info]   at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208)
      [info]   at org.scalatest.FunSuite.runTests(FunSuite.scala:1555)
      [info]   at org.scalatest.Suite$class.run(Suite.scala:1424)
      [info]   at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555)
      [info]   at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
      [info]   at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
      [info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
      [info]   at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212)
      [info]   at org.scalatest.FunSuite.run(FunSuite.scala:1555)
      [info]   at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:462)
      [info]   at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:671)
      [info]   at sbt.ForkMain$Run$2.call(ForkMain.java:294)
      [info]   at sbt.ForkMain$Run$2.call(ForkMain.java:284)
      [info]   at java.util.concurrent.FutureTask.run(FutureTask.java:262)
      [info]   at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
      [info]   at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
      [info]   at java.lang.Thread.run(Thread.java:745)
      

      Attachments

        Issue Links

          Activity

            People

              marmbrus Michael Armbrust
              yhuai Yin Huai
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: