Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-12512

WithColumn does not work on multiple column with special character

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Major
    • Resolution: Fixed
    • Affects Version/s: 1.5.2
    • Fix Version/s: 2.0.0
    • Component/s: SQL
    • Labels:

      Description

      Just for simplicity I am using Scalaide scala-worksheet to show the problem

      the withColumn could not work from .withColumnRenamed("bField","k.b:Field")

      Bar.scala
      object bug {
        println("Welcome to the Scala worksheet")       //> Welcome to the Scala worksheet
        
        import org.apache.spark.SparkContext
      	import org.apache.spark.SparkConf
      	import org.apache.spark.sql.SQLContext
      	import org.apache.spark.sql.Row
      	import org.apache.spark.sql.types.DateType
      	import org.apache.spark.sql.functions._
      	import org.apache.spark.storage.StorageLevel._
      	import org.apache.spark.sql.types.{StructType,StructField,StringType}
      	
      	val conf = new SparkConf()
                   .setMaster("local[4]")
                   .setAppName("Testbug")               //> conf  : org.apache.spark.SparkConf = org.apache.spark.SparkConf@3b94d659
        
        val sc = new SparkContext(conf)                 //> sc  : org.apache.spark.SparkContext = org.apache.spark.SparkContext@1dcca8d3
                                                        //| 
         
        val sqlContext = new SQLContext(sc)             //> sqlContext  : org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLCont
                                                        //| ext@2d23faef
        
        val schemaString = "aField,bField,cField"       //> schemaString  : String = aField,bField,cField
        
        val schema = StructType(schemaString.split(",")
        	.map(fieldName => StructField(fieldName, StringType, true)))
                                                        //> schema  : org.apache.spark.sql.types.StructType = StructType(StructField(aFi
                                                        //| eld,StringType,true), StructField(bField,StringType,true), StructField(cFiel
                                                        //| d,StringType,true))
        //import sqlContext.implicits._
         
        val newRDD = sc.parallelize(List(("a","b","c")))
        	.map(x=>Row(x._1,x._2,x._3))              //> newRDD  : org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitions
                                                        //| RDD[1] at map at com.joee.worksheet.bug.scala:30
        
        val newDF = sqlContext.createDataFrame(newRDD, schema)
                                                        //> newDF  : org.apache.spark.sql.DataFrame = [aField: string, bField: string, c
                                                        //| Field: string]
      	
        val changeDF = newDF.withColumnRenamed("aField","anodotField")
        .withColumnRenamed("bField","bnodotField")
        .show()                                         //> +-----------+-----------+------+
                                                        //| |anodotField|bnodotField|cField|
                                                        //| +-----------+-----------+------+
                                                        //| |          a|          b|     c|
                                                        //| +-----------+-----------+------+
                                                        //| 
                                                        //| changeDF  : Unit = ()
        val changeDFwithdotfield1 = newDF.withColumnRenamed("aField","k.a:Field")
                                                        //> changeDFwithdotfield1  : org.apache.spark.sql.DataFrame = [k.a:Field: strin
                                                        //| g, bField: string, cField: string]
        
        val changeDFwithdotfield = changeDFwithdotfield1 .withColumnRenamed("bField","k.b:Field")
                                                        //> org.apache.spark.sql.AnalysisException: cannot resolve 'k.a:Field' given in
                                                        //| put columns k.a:Field, bField, cField;
                                                        //| 	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAn
                                                        //| alysis(package.scala:42)
                                                        //| 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
                                                        //| alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:56)
                                                        //| 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
                                                        //| alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:53)
                                                        //| 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
                                                        //| pply(TreeNode.scala:293)
                                                        //| 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
                                                        //| pply(TreeNode.scala:293)
                                                        //| 	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNod
                                                        //| e.scala:51)
                                                        //| 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.sca
                                                        //| la:292)
                                                        //| 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.app
                                                        //| Output exceeds cutoff limit.
        
                                                        
        val changeDFwithdotfieldlt = changeDFwithdotfield.withColumn("k.a:Field",lit("tt")).show(10)
      }
      

        Attachments

          Activity

            People

            • Assignee:
              xguo27 Xiu (Joe) Guo
              Reporter:
              tweakmy JO EE
            • Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: