Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-6370

Improve documentation of RDD.sample() fraction's effect

    XMLWordPrintableJSON

Details

    • Documentation
    • Status: Resolved
    • Minor
    • Resolution: Fixed
    • 1.2.1, 1.3.0
    • 1.4.0
    • Spark Core
    • Ubuntu 14.04 64-bit, spark-1.3.0-bin-hadoop2.4

    Description

      Here's the repl output:

      code:java
      scala> uniqueIds.collect
      res10: Array[String] = Array(4, 8, 21, 80, 20, 98, 42, 15, 48, 36, 90, 46, 55, 16, 31, 71, 9, 50, 28, 61, 68, 85, 12, 94, 38, 77, 2, 11, 10)

      scala> val swr = uniqueIds.sample(true, 0.5)
      swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[22] at sample at <console>:27

      scala> swr.count
      res17: Long = 16

      scala> val swr = uniqueIds.sample(true, 0.5)
      swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[23] at sample at <console>:27

      scala> swr.count
      res18: Long = 8

      scala> val swr = uniqueIds.sample(true, 0.5)
      swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[24] at sample at <console>:27

      scala> swr.count
      res19: Long = 18

      scala> val swr = uniqueIds.sample(true, 0.5)
      swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[25] at sample at <console>:27

      scala> swr.count
      res20: Long = 15

      scala> val swr = uniqueIds.sample(true, 0.5)
      swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[26] at sample at <console>:27

      scala> swr.count
      res21: Long = 11

      scala> val swr = uniqueIds.sample(true, 0.5)
      swr: org.apache.spark.rdd.RDD[String] = PartitionwiseSampledRDD[27] at sample at <console>:27

      scala> swr.count
      res22: Long = 10
      code

      Attachments

        Activity

          People

            mbonaci Marko Bonaci
            mbonaci Marko Bonaci
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: