Details
-
Documentation
-
Status: Resolved
-
Minor
-
Resolution: Fixed
-
1.2.1, 1.3.0
-
Ubuntu 14.04 64-bit, spark-1.3.0-bin-hadoop2.4
Description
Here's the repl output:
code:java
scala> uniqueIds.collect
res10: Array[String] = Array(4, 8, 21, 80, 20, 98, 42, 15, 48, 36, 90, 46, 55, 16, 31, 71, 9, 50, 28, 61, 68, 85, 12, 94, 38, 77, 2, 11, 10)
scala> val swr = uniqueIds.sample(true, 0.5)
swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[22] at sample at <console>:27
scala> swr.count
res17: Long = 16
scala> val swr = uniqueIds.sample(true, 0.5)
swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[23] at sample at <console>:27
scala> swr.count
res18: Long = 8
scala> val swr = uniqueIds.sample(true, 0.5)
swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[24] at sample at <console>:27
scala> swr.count
res19: Long = 18
scala> val swr = uniqueIds.sample(true, 0.5)
swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[25] at sample at <console>:27
scala> swr.count
res20: Long = 15
scala> val swr = uniqueIds.sample(true, 0.5)
swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[26] at sample at <console>:27
scala> swr.count
res21: Long = 11
scala> val swr = uniqueIds.sample(true, 0.5)
swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[27] at sample at <console>:27
scala> swr.count
res22: Long = 10
code