Details
Description
The code snippet demonstrates the problem.
import org.apache.spark.sql._ import org.apache.spark.sql.types._ val sparkConf = new SparkConf().setAppName("Spark Test").setMaster(System.getProperty("spark.master", "local[4]")) val sc = new SparkContext(sparkConf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ val custs = Seq( Row(1, "Bob", 21, 80.5), Row(2, "Bobby", 21, 80.5), Row(3, "Jean", 21, 80.5), Row(4, "Fatime", 21, 80.5) ) var fields = List( StructField("id", IntegerType, true), StructField("a", IntegerType, true), StructField("b", StringType, true), StructField("target", DoubleType, false)) val schema = StructType(fields) var rdd = sc.parallelize(custs) var df = sqlContext.createDataFrame(rdd, schema) for (i <- 1 to 200) { val now = System.currentTimeMillis df = df.withColumn("a_new_col_" + i, df("a") + i) println(s"$i -> " + (System.currentTimeMillis - now)) } df.show()