Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-38507

DataFrame withColumn method not adding or replacing columns when alias is used

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Not A Problem
    • 3.1.2
    • None
    • SQL

    Description

      I have an input DataFrame df created as follows:

      import spark.implicits._
      val df = List((5, 10), (6, 20)).toDF("field1", "field2").alias("df") 

      When I execute either this command:

      df.select("df.field2").show(2) 

      or that one:

      df.withColumn("df.field2", lit(0)).select("df.field2").show(2) 

      I get the same result:

      +------+
      |field2|
      +------+
      |    10|
      |    20|
      +------+ 

      Additionally, when I execute the following command:

      df.withColumn("df.field3", lit(0)).select("df.field3").show(2)

      I get this exception:

      org.apache.spark.sql.AnalysisException: cannot resolve '`df.field3`' given input columns: [df.field3, df.field1, df.field2]; 'Project ['df.field3] +- Project [field1#7, field2#8, 0 AS df.field3#31]    +- SubqueryAlias df       +- Project [_1#2 AS field1#7, _2#3 AS field2#8]          +- LocalRelation [_1#2, _2#3]  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)   at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$$nestedInanonfun$checkAnalysis$1$2.applyOrElse(CheckAnalysis.scala:155)   at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$$nestedInanonfun$checkAnalysis$1$2.applyOrElse(CheckAnalysis.scala:152)   at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$2(TreeNode.scala:342)   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74)   at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:342)   at org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$transformExpressionsUp$1(QueryPlan.scala:104)   at org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:116)   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74)   at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:116)   at org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:127)   at org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:132)   at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)   at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)   at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)   at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)   at scala.collection.TraversableLike.map(TraversableLike.scala:238)   at scala.collection.TraversableLike.map$(TraversableLike.scala:231)   at scala.collection.AbstractTraversable.map(Traversable.scala:108)   at org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:132)   at org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:137)   at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:244)   at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:137)   at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:104)   at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:152)   at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:93)   at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:184)   at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:93)   at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:90)   at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:155)   at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:176)   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:228)   at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:173)   at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73)   at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)   at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143)   at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)   at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143)   at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73)   at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71)   at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63)   at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:90)   at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:88)   at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:3715)   at org.apache.spark.sql.Dataset.select(Dataset.scala:1462)   at org.apache.spark.sql.Dataset.select(Dataset.scala:1479)   
      ... 49 elided  

      Attachments

        Activity

          People

            Unassigned Unassigned
            amavrommatis Alexandros Mavrommatis
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: