Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Incomplete
-
2.1.0
-
None
Description
After upgrading to Spark 2.1.0 we noticed that they are duplicate jobs executed. Going back to Spark 2.0.1 they are gone again
import org.apache.spark.sql._ object DoubleJobs { def main(args: Array[String]) { System.setProperty("hadoop.home.dir", "/tmp"); val sparkSession: SparkSession = SparkSession.builder .master("local[4]") .appName("spark session example") .config("spark.driver.maxResultSize", "6G") .config("spark.sql.orc.filterPushdown", true) .config("spark.sql.hive.metastorePartitionPruning", true) .getOrCreate() sparkSession.sqlContext.setConf("spark.sql.orc.filterPushdown", "true") val paths = Seq( ""//some orc source ) def dataFrame(path: String): DataFrame = { sparkSession.read.orc(path) } paths.foreach(path => { dataFrame(path).show(20) }) } }