Details
Description
here is a minimal Spark Application to reproduce:
import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} object DropDupesApp extends App { override def main(args: Array[String]): Unit = { val conf = new SparkConf() .setAppName("test") .setMaster("local") val sc = new SparkContext(conf) val sql = SQLContext.getOrCreate(sc) assert(sql.emptyDataFrame.count == 0) // expected assert(sql.emptyDataFrame.dropDuplicates.count == 1) // unexpected } }