Index: modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
===================================================================
--- modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java	(revision 1180628)
+++ modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java	(working copy)
@@ -2,6 +2,7 @@
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Random;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -41,7 +42,7 @@
   protected static final int K = 2; 
   
   /** since there is a chance that this test would fail even if the code is correct, retry the sampling */
-  protected static final int RETRIES = 4; 
+  protected static final int RETRIES = 10;
   
   protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
       TaxonomyReader taxoReader, IndexReader indexReader,
@@ -53,53 +54,57 @@
    * is performed. The results are compared to non-sampled ones.
    */
   public void testCountUsingSamping() throws Exception, IOException {
+    boolean useRandomSampler = random.nextBoolean();
     for (int partitionSize : partitionSizes) {
-      initIndex(partitionSize);
-      
-      // Get all of the documents and run the query, then do different
-      // facet counts and compare to control
-      Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
-      ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(searcher.maxDoc(), false);
-      
-      FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize); 
-      FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
-      
-      searcher.search(q, MultiCollector.wrap(docCollector, fc));
-      
-      List<FacetResult> expectedResults = fc.getFacetResults();
-      
-      // complement with sampling!
-      final Sampler sampler = createSampler(docCollector.getScoredDocIDs());
-      
-      FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize); 
-
-      assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
-      assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
-
-      closeAll();
-    }
-  }
-  
-  private void assertSampling(List<FacetResult> expected, Query q, Sampler sampler, FacetSearchParams params, boolean complement) throws Exception {
-    // try several times in case of failure, because the test has a chance to fail 
-    // if the top K facets are not sufficiently common with the sample set
-    for (int n=RETRIES; n>0; n--) {
-      FacetsCollector samplingFC = samplingCollector(false, sampler, params);
-      
-      searcher.search(q, samplingFC);
-      List<FacetResult> sampledResults = samplingFC.getFacetResults();
-      
       try {
-        assertSameResults(expected, sampledResults);
-        break; // succeeded
-      } catch (Exception e) {
-        if (n<=1) { // otherwise try again
-          throw e; 
+        initIndex(partitionSize);
+        // Get all of the documents and run the query, then do different
+        // facet counts and compare to control
+        Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
+        ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(searcher.maxDoc(), false);
+        
+        FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize); 
+        FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
+        
+        searcher.search(q, MultiCollector.wrap(docCollector, fc));
+        
+        List<FacetResult> expectedResults = fc.getFacetResults();
+        
+        FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize); 
+        
+        // try several times in case of failure, because the test has a chance to fail 
+        // if the top K facets are not sufficiently common with the sample set
+        for (int nTrial=0; nTrial<RETRIES; nTrial++) {
+          try {
+            // complement with sampling!
+            final Sampler sampler = createSampler(nTrial, docCollector.getScoredDocIDs(), useRandomSampler);
+            
+            
+            assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
+            assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
+            
+            break; // succeeded
+          } catch (AssertionError e) {
+            if (nTrial>=RETRIES-1) { // otherwise try again
+              throw e; 
+            }
+          }
         }
+      } finally { 
+        closeAll();
       }
     }
   }
   
+  private void assertSampling(List<FacetResult> expected, Query q, Sampler sampler, FacetSearchParams params, boolean complement) throws Exception, AssertionError {
+    FacetsCollector samplingFC = samplingCollector(complement, sampler, params);
+    
+    searcher.search(q, samplingFC);
+    List<FacetResult> sampledResults = samplingFC.getFacetResults();
+    
+    assertSameResults(expected, sampledResults);
+  }
+  
   private FacetsCollector samplingCollector(
       final boolean complement,
       final Sampler sampler,
@@ -117,14 +122,19 @@
     return samplingFC;
   }
   
-  private Sampler createSampler(ScoredDocIDs scoredDocIDs) {
+  private Sampler createSampler(int nTrial, ScoredDocIDs scoredDocIDs, boolean useRandomSampler) {
     SamplingParams samplingParams = new SamplingParams();
-    samplingParams.setSampleRatio(0.8);
-    samplingParams.setMinSampleSize(100);
-    samplingParams.setMaxSampleSize(10000);
+    
+    final double retryFactor = Math.pow(1.01, nTrial);
+    samplingParams.setSampleRatio(0.8 * retryFactor);
+    samplingParams.setMinSampleSize((int) (100 * retryFactor));
+    samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
+    samplingParams.setOversampleFactor(5.0 * retryFactor);
+
     samplingParams.setSampingThreshold(11000); //force sampling 
-    samplingParams.setOversampleFactor(5.0);
-    Sampler sampler = new Sampler(samplingParams);
+    Sampler sampler = useRandomSampler ? 
+        new RandomSampler(samplingParams, new Random(random.nextLong())) :
+          new RepeatableSampler(samplingParams);
     assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
     return sampler;
   }
Index: modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
===================================================================
--- modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java	(revision 1180628)
+++ modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java	(working copy)
@@ -2,12 +2,15 @@
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Random;
 
 import org.apache.lucene.index.IndexReader;
 
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
 import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.sampling.RandomSampler;
+import org.apache.lucene.facet.search.sampling.RepeatableSampler;
 import org.apache.lucene.facet.search.sampling.Sampler;
 import org.apache.lucene.facet.search.sampling.SamplingAccumulator;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@@ -44,7 +47,8 @@
  */
 public final class AdaptiveFacetsAccumulator extends StandardFacetsAccumulator {
   
-  private Sampler sampler = new Sampler();
+  private Sampler sampler = new RandomSampler();
+  //private Sampler sampler = new RepeatableSampler();
 
   /**
    * Create an {@link AdaptiveFacetsAccumulator} 
Index: modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
===================================================================
--- modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java	(revision 1180628)
+++ modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java	(working copy)
@@ -1,8 +1,6 @@
 package org.apache.lucene.facet.search.sampling;
 
 import java.io.IOException;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 
 import org.apache.lucene.index.IndexReader;
 
@@ -15,8 +13,6 @@
 import org.apache.lucene.facet.search.results.FacetResultNode;
 import org.apache.lucene.facet.search.results.MutableFacetResultNode;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.util.RandomSample;
-import org.apache.lucene.facet.util.ScoredDocIdsUtils;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -48,11 +44,9 @@
  * 
  * @lucene.experimental
  */
-public class Sampler {
+public abstract class Sampler {
 
-  private static final Logger logger = Logger.getLogger(Sampler.class.getName());
-
-  private final SamplingParams samplingParams;
+  protected final SamplingParams samplingParams;
   
   /**
    * Construct with {@link SamplingParams}
@@ -103,26 +97,20 @@
     sampleSetSize = Math.max(sampleSetSize, samplingParams.getMinSampleSize());
     sampleSetSize = Math.min(sampleSetSize, samplingParams.getMaxSampleSize());
 
-    int[] sampleSet = null;
-    try {
-      sampleSet = RandomSample.repeatableSample(docids, actualSize,
-          sampleSetSize);
-    } catch (IOException e) {
-      if (logger.isLoggable(Level.WARNING)) {
-        logger.log(Level.WARNING, "sampling failed: "+e.getMessage()+" - falling back to no sampling!", e);
-      }
-      return new SampleResult(docids, 1d);
-    }
-
-    ScoredDocIDs sampled = ScoredDocIdsUtils.createScoredDocIDsSubset(docids,
-        sampleSet);
-    if (logger.isLoggable(Level.FINEST)) {
-      logger.finest("******************** " + sampled.size());
-    }
-    return new SampleResult(sampled, sampled.size()/(double)docids.size());
+    return createSample(docids, actualSize, sampleSetSize);
   }
 
   /**
+   * Create and return a sample of the input set
+   * @param docids input set out of which a sample is to be created 
+   * @param actualSize original size of set, prior to sampling
+   * @param sampleSetSize required size of sample set
+   * @return sample of the input set in the required size
+   */
+  protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize,
+      int sampleSetSize) throws IOException;
+
+  /**
    * Get a fixer of sample facet accumulation results. Default implementation
    * returns a <code>TakmiSampleFixer</code> which is adequate only for
    * counting. For any other accumulator, provide a different fixer.
Index: modules/facet/src/java/org/apache/lucene/facet/search/sampling/RandomSampler.java
===================================================================
--- modules/facet/src/java/org/apache/lucene/facet/search/sampling/RandomSampler.java	(revision 0)
+++ modules/facet/src/java/org/apache/lucene/facet/search/sampling/RandomSampler.java	(revision 0)
@@ -0,0 +1,68 @@
+package org.apache.lucene.facet.search.sampling;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.ScoredDocIDsIterator;
+import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Simple random sampler
+ */
+public class RandomSampler extends Sampler {
+  
+  private final Random random;
+
+  public RandomSampler() {
+    super();
+    this.random = new Random();
+  }
+
+  public RandomSampler(SamplingParams params, Random random) throws IllegalArgumentException {
+    super(params);
+    this.random = random;
+  }
+
+  @Override
+  protected SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize) throws IOException {
+    final int[] sample = new int[sampleSetSize];
+    final int maxStep = (actualSize * 2 ) / sampleSetSize; //floor
+    int remaining = actualSize;
+    ScoredDocIDsIterator it = docids.iterator();
+    int i = 0;
+    while (i<sample.length && remaining>(sampleSetSize-maxStep-i)) {
+      int step = 1 + random.nextInt(maxStep);
+      for (int j=0; j<step; j++) {
+        it.next();
+        -- remaining;
+      }
+      sample[i++] = it.getDocID();
+    }
+    while (i<sample.length) {
+      it.next();
+      sample[i++] = it.getDocID();
+    }
+    ScoredDocIDs sampleRes = ScoredDocIdsUtils.createScoredDocIDsSubset(docids, sample);
+    SampleResult res = new SampleResult(sampleRes, sampleSetSize/(double)actualSize);
+    return res;
+  }
+  
+}
Index: modules/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
===================================================================
--- modules/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java	(revision 1180573)
+++ modules/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java	(working copy)
@@ -1,25 +1,15 @@
-package org.apache.lucene.facet.util;
+package org.apache.lucene.facet.search.sampling;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.lucene.analysis.core.KeywordAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.Version;
 
 import org.apache.lucene.facet.search.ScoredDocIDs;
 import org.apache.lucene.facet.search.ScoredDocIDsIterator;
+import org.apache.lucene.facet.util.ScoredDocIdsUtils;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -40,13 +30,38 @@
 
 /**
  * Take random samples of large collections.
- * 
  * @lucene.experimental
  */
-public class RandomSample {
+public class RepeatableSampler extends Sampler {
 
-  private static final Logger logger = Logger.getLogger(RandomSample.class.getName());
+  private static final Logger logger = Logger.getLogger(RepeatableSampler.class.getName());
 
+  public RepeatableSampler(SamplingParams params) {
+    super(params);
+  }
+  
+  @Override
+  protected SampleResult createSample(ScoredDocIDs docids, int actualSize,
+      int sampleSetSize) throws IOException {
+    int[] sampleSet = null;
+    try {
+      sampleSet = repeatableSample(docids, actualSize,
+          sampleSetSize);
+    } catch (IOException e) {
+      if (logger.isLoggable(Level.WARNING)) {
+        logger.log(Level.WARNING, "sampling failed: "+e.getMessage()+" - falling back to no sampling!", e);
+      }
+      return new SampleResult(docids, 1d);
+    }
+
+    ScoredDocIDs sampled = ScoredDocIdsUtils.createScoredDocIDsSubset(docids,
+        sampleSet);
+    if (logger.isLoggable(Level.FINEST)) {
+      logger.finest("******************** " + sampled.size());
+    }
+    return new SampleResult(sampled, sampled.size()/(double)docids.size());
+  }
+  
   /**
    * Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
    * locations of <code>collection</code>, chosen using
@@ -57,10 +72,10 @@
    * @return An array of values chosen from the collection.
    * @see Algorithm#TRAVERSAL
    */
-  public static int[] repeatableSample(ScoredDocIDs collection,
+  private static int[] repeatableSample(ScoredDocIDs collection,
       int collectionSize, int sampleSize)
   throws IOException {
-    return RandomSample.repeatableSample(collection, collectionSize,
+    return repeatableSample(collection, collectionSize,
         sampleSize, Algorithm.HASHING, Sorted.NO);
   }
 
@@ -75,7 +90,7 @@
    * Sorted.NO to return them in essentially random order.
    * @return An array of values chosen from the collection.
    */
-  public static int[] repeatableSample(ScoredDocIDs collection,
+  private static int[] repeatableSample(ScoredDocIDs collection,
       int collectionSize, int sampleSize,
       Algorithm algorithm, Sorted sorted)
   throws IOException {
@@ -91,16 +106,16 @@
     int[] sample = new int[sampleSize];
     long[] times = new long[4];
     if (algorithm == Algorithm.TRAVERSAL) {
-      RandomSample.sample1(collection, collectionSize, sample, times);
+      sample1(collection, collectionSize, sample, times);
     } else if (algorithm == Algorithm.HASHING) {
-      RandomSample.sample2(collection, collectionSize, sample, times);
+      sample2(collection, collectionSize, sample, times);
     } else {
       throw new IllegalArgumentException("Invalid algorithm selection");
     }
     if (sorted == Sorted.YES) {
       Arrays.sort(sample);
     }
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[3] = System.currentTimeMillis();
       if (logger.isLoggable(Level.FINEST)) {
         logger.finest("Times: " + (times[1] - times[0]) + "ms, "
@@ -133,13 +148,13 @@
   private static void sample1(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times) 
   throws IOException {
     ScoredDocIDsIterator it = collection.iterator();
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[0] = System.currentTimeMillis();
     }
     int sampleSize = sample.length;
-    int prime = RandomSample.findGoodStepSize(collectionSize, sampleSize);
+    int prime = findGoodStepSize(collectionSize, sampleSize);
     int mod = prime % collectionSize;
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[1] = System.currentTimeMillis();
     }
     int sampleCount = 0;
@@ -158,10 +173,10 @@
       }
       sample[sampleCount++] = it.getDocID();
     }
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[2] = System.currentTimeMillis();
     }
-  } // end RandomSample.sample1()
+  }
 
   /**
    * Returns a value which will allow the caller to walk
@@ -187,10 +202,10 @@
       i = collectionSize / sampleSize;
     }
     do {
-      i = RandomSample.findNextPrimeAfter(i);
+      i = findNextPrimeAfter(i);
     } while (collectionSize % i == 0);
     return i;
-  } // end RandomSample.findGoodStepSize()
+  }
 
   /**
    * Returns the first prime number that is larger than <code>n</code>.
@@ -199,10 +214,10 @@
    */
   private static int findNextPrimeAfter(int n) {
     n += (n % 2 == 0) ? 1 : 2; // next odd
-    foundFactor: for (;; n += 2) {
+    foundFactor: for (;; n += 2) { //TODO labels??!!
       int sri = (int) (Math.sqrt(n));
-      for (int primeIndex = 0; primeIndex < RandomSample.N_PRIMES; primeIndex++) {
-        int p = RandomSample.primes[primeIndex];
+      for (int primeIndex = 0; primeIndex < N_PRIMES; primeIndex++) {
+        int p = primes[primeIndex];
         if (p > sri) {
           return n;
         }
@@ -210,7 +225,7 @@
           continue foundFactor;
         }
       }
-      for (int p = RandomSample.primes[RandomSample.N_PRIMES - 1] + 2;; p += 2) {
+      for (int p = primes[N_PRIMES - 1] + 2;; p += 2) {
         if (p > sri) {
           return n;
         }
@@ -219,70 +234,17 @@
         }
       }
     }
-  } // end RandomSample.findNextPrimeAfter()
+  }
 
   /**
-   * Divides the values in <code>collection</code> into <code>numSubranges</code>
-   * subranges from <code>minValue</code> to <code>maxValue</code> and returns the
-   * number of values in each subrange. (For testing the flatness of distribution of
-   * a sample.)
-   * @param collection The collection of values to be counted.
-   * @param range The number of possible values.
-   * @param numSubranges How many intervals to divide the value range into.
-   */
-  private static int[] countsBySubrange(int[] collection, int range, int numSubranges) {
-    int[] counts = new int[numSubranges];
-    Arrays.fill(counts, 0);
-    int numInSubrange = range / numSubranges;
-    for (int j = 0; j < collection.length; j++) {
-      counts[collection[j] / numInSubrange]++;
-    }
-    return counts;
-  } // end RandomSample.countsBySubrange()
-
-  /**
-   * Factors <code>value</code> into primes.
-   */
-  public static int[] factor(long value) {
-    ArrayList<Integer> list = new ArrayList<Integer>();
-    while (value > 1 && value % 2 == 0) {
-      list.add(2);
-      value /= 2;
-    }
-    long sqrt = Math.round(Math.sqrt(value));
-    for (int pIndex = 0, lim = RandomSample.primes.length; pIndex < lim; pIndex++) {
-      int p = RandomSample.primes[pIndex];
-      if (p >= sqrt) {
-        break;
-      }
-      while (value % p == 0) {
-        list.add(p);
-        value /= p;
-        sqrt = Math.round(Math.sqrt(value));
-      }
-    }
-    if (list.size() == 0 || value > Integer.MAX_VALUE) {
-      throw new RuntimeException("Prime or too large to factor: "+value);
-    }
-    if ((int)value > 1) {
-      list.add((int)value);
-    }
-    int[] factors = new int[list.size()];
-    for (int j = 0; j < factors.length; j++) {
-      factors[j] = list.get(j).intValue();
-    }
-    return factors;
-  } // end RandomSample.factor()
-
-  /**
    * The first N_PRIMES primes, after 2.
    */
   private static final int N_PRIMES = 4000;
-  private static int[] primes = new int[RandomSample.N_PRIMES];
+  private static int[] primes = new int[N_PRIMES];
   static {
-    RandomSample.primes[0] = 3;
-    for (int count = 1; count < RandomSample.N_PRIMES; count++) {
-      primes[count] = RandomSample.findNextPrimeAfter(primes[count - 1]);
+    primes[0] = 3;
+    for (int count = 1; count < N_PRIMES; count++) {
+      primes[count] = findNextPrimeAfter(primes[count - 1]);
     }
   }
 
@@ -307,7 +269,7 @@
    */
   private static void sample2(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times) 
   throws IOException {
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[0] = System.currentTimeMillis();
     }
     int sampleSize = sample.length;
@@ -320,7 +282,7 @@
     while (it.next()) {
       pq.insertWithReuse((int)(it.getDocID() * PHI_32) & 0x7FFFFFFF);
     }
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[1] = System.currentTimeMillis();
     }
     /*
@@ -330,10 +292,10 @@
     for (int si = 0; si < sampleSize; si++) {
       sample[si] = (int)(((IntPriorityQueue.MI)(heap[si+1])).value * PHI_32I) & 0x7FFFFFFF;
     }
-    if (RandomSample.returnTimings) {
+    if (returnTimings) {
       times[2] = System.currentTimeMillis();
     }
-  } // end RandomSample.sample2()
+  }
 
   /**
    * A bounded priority queue for Integers, to retain a specified number of
@@ -358,7 +320,7 @@
       }
       this.mi.value = intval;
       this.mi = (MI)this.insertWithOverflow(this.mi);
-    } // end IntPriorityQueue.insertWithReuse()
+    }
 
     /**
      * Returns the underlying data structure for faster access. Extracting elements
@@ -386,19 +348,19 @@
     private static class MI {
       MI() { }
       public int value;
-    } // end class RandomSample.IntPriorityQueue.MI
+    }
 
     /**
      * The mutable integer instance for reuse after first overflow.
      */
     private MI mi;
 
-  } // end class RandomSample.IntPriorityQueue
+  }
 
   /**
    * For specifying which sampling algorithm to use.
    */
-  public static class Algorithm {
+  private enum Algorithm {
 
     /**
      * Specifies a methodical traversal algorithm, which is guaranteed to span the collection
@@ -410,7 +372,7 @@
     // TODO (Facet): This one produces a bimodal distribution (very flat around
     // each peak!) for collection size 10M and sample sizes 10k and 10544.
     // Figure out why.
-    public static final Algorithm TRAVERSAL = new Algorithm("Traversal");
+    TRAVERSAL,
 
     /**
      * Specifies a Fibonacci-style hash algorithm (see Knuth, S&S), which generates a less
@@ -418,69 +380,25 @@
      * but requires a bounded priority queue the size of the sample, and creates an object
      * containing a sampled value and its hash, for every element in the full set. 
      */
-    public static final Algorithm HASHING = new Algorithm("Hashing");
+    HASHING
+  }
 
-    /**
-     * Constructs an instance of an algorithm.
-     * @param name An ID for printing.
-     */
-    private Algorithm(String name) {
-      this.name = name;
-    }
-
-    /**
-     * Prints this algorithm's name.
-     */
-    @Override
-    public String toString() {
-      return this.name;
-    }
-
-    /**
-     * The name of this algorithm, for printing.
-     */
-    private String name;
-
-  } // end class RandomSample.Algorithm
-
   /**
    * For specifying whether to sort the sample.
    */
-  public static class Sorted {
+  private enum Sorted {
 
     /**
-     * Specifies sorting the resulting sample before returning.
+     * Sort resulting sample before returning.
      */
-    public static final Sorted YES = new Sorted("sorted");
+    YES,
 
     /**
-     * Specifies not sorting the resulting sample. 
+     *Do not sort the resulting sample. 
      */
-    public static final Sorted NO = new Sorted("unsorted");
+    NO
+  }
 
-    /**
-     * Constructs an instance of a "sorted" selector.
-     * @param name An ID for printing.
-     */
-    private Sorted(String name) {
-      this.name = name;
-    }
-
-    /**
-     * Prints this selector's name.
-     */
-    @Override
-    public String toString() {
-      return this.name;
-    }
-
-    /**
-     * The name of this selector, for printing.
-     */
-    private String name;
-
-  } // end class RandomSample.Sorted
-
   /**
    * Magic number 1: prime closest to phi, in 32 bits.
    */
@@ -496,143 +414,4 @@
    */
   private static boolean returnTimings = false;
 
-  /**
-   * Self-test.
-   */
-  public static void main(String[] args) throws Exception {
-    RandomSample.returnTimings = true;
-    /*
-     * Create an array of sequential integers, from which samples will be taken.
-     */
-    final int COLLECTION_SIZE = 10 * 1000 * 1000;
-    ScoredDocIDs collection = createAllScoredDocs(COLLECTION_SIZE);
-
-    /*
-     * Factor PHI.
-     *
-        int[] factors = RandomSample.factor(PHI_32);
-        System.out.print("Factors of PHI_32: ");
-        for (int k : factors) {
-          System.out.print(k+", ");
-        }
-        System.out.println("");
-
-     * Verify inverse relationship of PHI & phi.
-     *
-        boolean inverseValid = true;
-        for (int j = 0; j < Integer.MAX_VALUE; j++) {
-          int k = (int)(j * PHI_32) & 0x7FFFFFFF;
-          int m = (int)(k * PHI_32I) & 0X7FFFFFFF;
-          if (j != m) {
-            System.out.println("Inverse not valid for "+j);
-            inverseValid = false;
-          }
-        }
-        System.out.println("Inverse valid? "+inverseValid);
-     */
-    /*
-     * Take samples of various sizes from the full set, verify no duplicates,
-     * check flatness.
-     */
-    int[] sampleSizes = {
-        10, 57, 100, 333, 1000, 2154, 10000
-    };
-    Algorithm[] algorithms = { Algorithm.HASHING, Algorithm.TRAVERSAL };
-    for (int sampleSize : sampleSizes) {
-      for (Algorithm algorithm : algorithms) {
-        System.out.println("Sample size " + sampleSize
-            + ", algorithm " + algorithm + "...");
-        /*
-         * Take the sample.
-         */
-        int[] sample = RandomSample.repeatableSample(
-            collection, COLLECTION_SIZE, sampleSize, algorithm, Sorted.YES);
-        /*
-         * Check for duplicates.
-         */
-        boolean noDups = true;
-        for (int j = 0; j < sampleSize - 1; j++) {
-          if (sample[j] == sample[j + 1]) {
-            System.out.println("Duplicate value "
-                + sample[j] + " at " + j + ", "
-                + (j + 1));
-            noDups = false;
-            break;
-          }
-        }
-        if (noDups) {
-          System.out.println("No duplicates.");
-        }
-        if (algorithm == Algorithm.HASHING) {
-          System.out.print("Hashed sample, up to 100 of "+sampleSize+": ");
-          int lim = Math.min(100, sampleSize);
-          for (int k = 0; k < lim; k++) {
-            System.out.print(sample[k]+", ");
-          }
-          System.out.println("");
-        }
-        /*
-         * Check flatness of distribution in sample.
-         */
-        final int N_INTERVALS = 100;
-        int[] counts = RandomSample.countsBySubrange(sample, COLLECTION_SIZE, N_INTERVALS);
-        int minCount = Integer.MAX_VALUE;
-        int maxCount = Integer.MIN_VALUE;
-        int avgCount = 0;
-        for (int j = 0; j < N_INTERVALS; j++) {
-          int count = counts[j];
-          if (count < minCount) {
-            minCount = count;
-          }
-          if (count > maxCount) {
-            maxCount = count;
-          }
-          avgCount += count;
-        }
-        avgCount /= N_INTERVALS;
-        System.out.println("Min, max, avg: "+minCount+", "+maxCount+", "+avgCount);
-
-        if (((double)minCount - avgCount)/avgCount < -0.05 && (minCount - avgCount) < -5) {
-          System.out.println("Not flat enough.");
-        } else if (((double)maxCount - avgCount)/avgCount > 0.05 && (maxCount - avgCount) > 5) {
-          System.out.println("Not flat enough.");
-        } else {
-          System.out.println("Flat enough.");
-        }
-        if (sampleSize == 10544 && algorithm == Algorithm.TRAVERSAL) {
-          System.out.print("Counts of interest: ");
-          for (int j = 0; j < N_INTERVALS; j++) {
-            System.out.print(counts[j]+", ");
-          }
-          System.out.println("");
-        }
-      }
-    }
-    System.out.println("Last prime is "
-        + RandomSample.primes[RandomSample.N_PRIMES - 1]);
-  }
-
-  private static ScoredDocIDs createAllScoredDocs(final int COLLECTION_SIZE)
-  throws CorruptIndexException, LockObtainFailedException, IOException {
-    ScoredDocIDs collection;
-
-    IndexReader reader = null;
-    Directory ramDir = new RAMDirectory();
-    try {
-      IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_30, new KeywordAnalyzer()));
-      for (int i = 0; i < COLLECTION_SIZE; i++) {
-        writer.addDocument(new Document());
-      }
-      writer.commit();
-      writer.close();
-      reader = IndexReader.open(ramDir);
-      collection = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader);
-    } finally {
-      if (reader != null) {
-        reader.close();
-      }
-      ramDir.close();
-    }
-    return collection;
-  }
-} // end class RandomSample
+}
Index: modules/facet/src/java/org/apache/lucene/facet/util/RandomSample.java
===================================================================
--- modules/facet/src/java/org/apache/lucene/facet/util/RandomSample.java	(revision 1180628)
+++ modules/facet/src/java/org/apache/lucene/facet/util/RandomSample.java	(working copy)
@@ -1,638 +0,0 @@
-package org.apache.lucene.facet.util;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.lucene.analysis.core.KeywordAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.Version;
-
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Take random samples of large collections.
- * 
- * @lucene.experimental
- */
-public class RandomSample {
-
-  private static final Logger logger = Logger.getLogger(RandomSample.class.getName());
-
-  /**
-   * Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
-   * locations of <code>collection</code>, chosen using
-   * the <code>TRAVERSAL</code> algorithm. The sample values are not sorted.
-   * @param collection The values from which a sample is wanted.
-   * @param collectionSize The number of values (from the first) from which to draw the sample.
-   * @param sampleSize The number of values to return.
-   * @return An array of values chosen from the collection.
-   * @see Algorithm#TRAVERSAL
-   */
-  public static int[] repeatableSample(ScoredDocIDs collection,
-      int collectionSize, int sampleSize)
-  throws IOException {
-    return RandomSample.repeatableSample(collection, collectionSize,
-        sampleSize, Algorithm.HASHING, Sorted.NO);
-  }
-
-  /**
-   * Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
-   * locations of <code>collection</code>, chosen using <code>algorithm</code>.
-   * @param collection The values from which a sample is wanted.
-   * @param collectionSize The number of values (from the first) from which to draw the sample.
-   * @param sampleSize The number of values to return.
-   * @param algorithm Which algorithm to use.
-   * @param sorted Sorted.YES to sort the sample values in ascending order before returning;
-   * Sorted.NO to return them in essentially random order.
-   * @return An array of values chosen from the collection.
-   */
-  public static int[] repeatableSample(ScoredDocIDs collection,
-      int collectionSize, int sampleSize,
-      Algorithm algorithm, Sorted sorted)
-  throws IOException {
-    if (collection == null) {
-      throw new IOException("docIdSet is null");
-    }
-    if (sampleSize < 1) {
-      throw new IOException("sampleSize < 1 (" + sampleSize + ")");
-    }
-    if (collectionSize < sampleSize) {
-      throw new IOException("collectionSize (" + collectionSize + ") less than sampleSize (" + sampleSize + ")");
-    }
-    int[] sample = new int[sampleSize];
-    long[] times = new long[4];
-    if (algorithm == Algorithm.TRAVERSAL) {
-      RandomSample.sample1(collection, collectionSize, sample, times);
-    } else if (algorithm == Algorithm.HASHING) {
-      RandomSample.sample2(collection, collectionSize, sample, times);
-    } else {
-      throw new IllegalArgumentException("Invalid algorithm selection");
-    }
-    if (sorted == Sorted.YES) {
-      Arrays.sort(sample);
-    }
-    if (RandomSample.returnTimings) {
-      times[3] = System.currentTimeMillis();
-      if (logger.isLoggable(Level.FINEST)) {
-        logger.finest("Times: " + (times[1] - times[0]) + "ms, "
-            + (times[2] - times[1]) + "ms, " + (times[3] - times[2])+"ms");
-      }
-    }
-    return sample;
-  }
-
-  /**
-   * Returns <code>sample</code>.length values chosen from the first <code>collectionSize</code>
-   * locations of <code>collection</code>, using the TRAVERSAL algorithm. The sample is
-   * pseudorandom: no subset of the original collection
-   * is in principle more likely to occur than any other, but for a given collection
-   * and sample size, the same sample will always be returned. This algorithm walks the
-   * original collection in a methodical way that is guaranteed not to visit any location
-   * more than once, which makes sampling without replacement faster because removals don't
-   * have to be tracked, and the number of operations is proportional to the sample size,
-   * not the collection size.
-   * Times for performance measurement
-   * are returned in <code>times</code>, which must be an array of at least three longs, containing
-   * nanosecond event times. The first
-   * is set when the algorithm starts; the second, when the step size has been calculated;
-   * and the third when the sample has been taken.
-   * @param collection The set to be sampled.
-   * @param collectionSize The number of values to use (starting from first).
-   * @param sample The array in which to return the sample.
-   * @param times The times of three events, for measuring performance.
-   */
-  private static void sample1(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times) 
-  throws IOException {
-    ScoredDocIDsIterator it = collection.iterator();
-    if (RandomSample.returnTimings) {
-      times[0] = System.currentTimeMillis();
-    }
-    int sampleSize = sample.length;
-    int prime = RandomSample.findGoodStepSize(collectionSize, sampleSize);
-    int mod = prime % collectionSize;
-    if (RandomSample.returnTimings) {
-      times[1] = System.currentTimeMillis();
-    }
-    int sampleCount = 0;
-    int index = 0;
-    for (; sampleCount < sampleSize;) {
-      if (index + mod < collectionSize) {
-        for (int i = 0; i < mod; i++, index++) {
-          it.next();
-        }
-      } else {
-        index = index + mod - collectionSize;
-        it = collection.iterator();
-        for (int i = 0; i < index; i++) {
-          it.next();
-        }
-      }
-      sample[sampleCount++] = it.getDocID();
-    }
-    if (RandomSample.returnTimings) {
-      times[2] = System.currentTimeMillis();
-    }
-  } // end RandomSample.sample1()
-
-  /**
-   * Returns a value which will allow the caller to walk
-   * a collection of <code>collectionSize</code> values, without repeating or missing
-   * any, and spanning the collection from beginning to end at least once with
-   * <code>sampleSize</code> visited locations. Choosing a value
-   * that is relatively prime to the collection size ensures that stepping by that size (modulo
-   * the collection size) will hit all locations without repeating, eliminating the need to
-   * track previously visited locations for a "without replacement" sample. Starting with the
-   * square root of the collection size ensures that either the first or second prime tried will
-   * work (they can't both divide the collection size). It also has the property that N steps of
-   * size N will span a collection of N**2 elements once. If the sample is bigger than N, it will
-   * wrap multiple times (without repeating). If the sample is smaller, a step size is chosen
-   * that will result in at least one spanning of the collection.
-   * 
-   * @param collectionSize The number of values in the collection to be sampled.
-   * @param sampleSize The number of values wanted in the sample.
-   * @return A good increment value for walking the collection.
-   */
-  private static int findGoodStepSize(int collectionSize, int sampleSize) {
-    int i = (int) Math.sqrt(collectionSize);
-    if (sampleSize < i) {
-      i = collectionSize / sampleSize;
-    }
-    do {
-      i = RandomSample.findNextPrimeAfter(i);
-    } while (collectionSize % i == 0);
-    return i;
-  } // end RandomSample.findGoodStepSize()
-
-  /**
-   * Returns the first prime number that is larger than <code>n</code>.
-   * @param n A number less than the prime to be returned.
-   * @return The smallest prime larger than <code>n</code>.
-   */
-  private static int findNextPrimeAfter(int n) {
-    n += (n % 2 == 0) ? 1 : 2; // next odd
-    foundFactor: for (;; n += 2) {
-      int sri = (int) (Math.sqrt(n));
-      for (int primeIndex = 0; primeIndex < RandomSample.N_PRIMES; primeIndex++) {
-        int p = RandomSample.primes[primeIndex];
-        if (p > sri) {
-          return n;
-        }
-        if (n % p == 0) {
-          continue foundFactor;
-        }
-      }
-      for (int p = RandomSample.primes[RandomSample.N_PRIMES - 1] + 2;; p += 2) {
-        if (p > sri) {
-          return n;
-        }
-        if (n % p == 0) {
-          continue foundFactor;
-        }
-      }
-    }
-  } // end RandomSample.findNextPrimeAfter()
-
-  /**
-   * Divides the values in <code>collection</code> into <code>numSubranges</code>
-   * subranges from <code>minValue</code> to <code>maxValue</code> and returns the
-   * number of values in each subrange. (For testing the flatness of distribution of
-   * a sample.)
-   * @param collection The collection of values to be counted.
-   * @param range The number of possible values.
-   * @param numSubranges How many intervals to divide the value range into.
-   */
-  private static int[] countsBySubrange(int[] collection, int range, int numSubranges) {
-    int[] counts = new int[numSubranges];
-    Arrays.fill(counts, 0);
-    int numInSubrange = range / numSubranges;
-    for (int j = 0; j < collection.length; j++) {
-      counts[collection[j] / numInSubrange]++;
-    }
-    return counts;
-  } // end RandomSample.countsBySubrange()
-
-  /**
-   * Factors <code>value</code> into primes.
-   */
-  public static int[] factor(long value) {
-    ArrayList<Integer> list = new ArrayList<Integer>();
-    while (value > 1 && value % 2 == 0) {
-      list.add(2);
-      value /= 2;
-    }
-    long sqrt = Math.round(Math.sqrt(value));
-    for (int pIndex = 0, lim = RandomSample.primes.length; pIndex < lim; pIndex++) {
-      int p = RandomSample.primes[pIndex];
-      if (p >= sqrt) {
-        break;
-      }
-      while (value % p == 0) {
-        list.add(p);
-        value /= p;
-        sqrt = Math.round(Math.sqrt(value));
-      }
-    }
-    if (list.size() == 0 || value > Integer.MAX_VALUE) {
-      throw new RuntimeException("Prime or too large to factor: "+value);
-    }
-    if ((int)value > 1) {
-      list.add((int)value);
-    }
-    int[] factors = new int[list.size()];
-    for (int j = 0; j < factors.length; j++) {
-      factors[j] = list.get(j).intValue();
-    }
-    return factors;
-  } // end RandomSample.factor()
-
-  /**
-   * The first N_PRIMES primes, after 2.
-   */
-  private static final int N_PRIMES = 4000;
-  private static int[] primes = new int[RandomSample.N_PRIMES];
-  static {
-    RandomSample.primes[0] = 3;
-    for (int count = 1; count < RandomSample.N_PRIMES; count++) {
-      primes[count] = RandomSample.findNextPrimeAfter(primes[count - 1]);
-    }
-  }
-
-  /**
-   * Returns <code>sample</code>.length values chosen from the first <code>collectionSize</code>
-   * locations of <code>collection</code>, using the HASHING algorithm. Performance measurements
-   * are returned in <code>times</code>, which must be an array of at least three longs. The first
-   * will be set when the algorithm starts; the second, when a hash key has been calculated and
-   * inserted into the priority queue for every element in the collection; and the third when the
-   * original elements associated with the keys remaining in the PQ have been stored in the sample
-   * array for return.
-   * <P>
-   * This algorithm slows as the sample size becomes a significant fraction of the collection
-   * size, because the PQ is as large as the sample set, and will not do early rejection of values
-   * below the minimum until it fills up, and a larger PQ contains more small values to be purged,
-   * resulting in less early rejection and more logN insertions.
-   * 
-   * @param collection The set to be sampled.
-   * @param collectionSize The number of values to use (starting from first).
-   * @param sample The array in which to return the sample.
-   * @param times The times of three events, for measuring performance.
-   */
-  private static void sample2(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times) 
-  throws IOException {
-    if (RandomSample.returnTimings) {
-      times[0] = System.currentTimeMillis();
-    }
-    int sampleSize = sample.length;
-    IntPriorityQueue pq = new IntPriorityQueue(sampleSize);
-    /*
-     * Convert every value in the collection to a hashed "weight" value, and insert
-     * into a bounded PQ (retains only sampleSize highest weights).
-     */
-    ScoredDocIDsIterator it = collection.iterator();
-    while (it.next()) {
-      pq.insertWithReuse((int)(it.getDocID() * PHI_32) & 0x7FFFFFFF);
-    }
-    if (RandomSample.returnTimings) {
-      times[1] = System.currentTimeMillis();
-    }
-    /*
-     * Extract heap, convert weights back to original values, and return as integers.
-     */
-    Object[] heap = pq.getHeap();
-    for (int si = 0; si < sampleSize; si++) {
-      sample[si] = (int)(((IntPriorityQueue.MI)(heap[si+1])).value * PHI_32I) & 0x7FFFFFFF;
-    }
-    if (RandomSample.returnTimings) {
-      times[2] = System.currentTimeMillis();
-    }
-  } // end RandomSample.sample2()
-
-  /**
-   * A bounded priority queue for Integers, to retain a specified number of
-   * the highest-weighted values for return as a random sample.
-   */
-  private static class IntPriorityQueue extends PriorityQueue<Object> {
-
-    /**
-     * Creates a bounded PQ of size <code>size</code>.
-     * @param size The number of elements to retain.
-     */
-    public IntPriorityQueue(int size) {
-      super(size);
-    }
-
-    /**
-     * Inserts an integer with overflow and object reuse.
-     */
-    public void insertWithReuse(int intval) {
-      if (this.mi == null) {
-        this.mi = new MI();
-      }
-      this.mi.value = intval;
-      this.mi = (MI)this.insertWithOverflow(this.mi);
-    } // end IntPriorityQueue.insertWithReuse()
-
-    /**
-     * Returns the underlying data structure for faster access. Extracting elements
-     * one at a time would require N logN time, and since we want the elements sorted
-     * in ascending order by value (not weight), the array is useful as-is.
-     * @return The underlying heap array.
-     */
-    public Object[] getHeap() {
-      return getHeapArray();
-    }
-
-    /**
-     * Returns true if <code>o1<code>'s weight is less than that of <code>o2</code>, for
-     * ordering in the PQ.
-     * @return True if <code>o1</code> weighs less than <code>o2</code>.
-     */
-    @Override
-    public boolean lessThan(Object o1, Object o2) {
-      return ((MI)o1).value < ((MI)o2).value;
-    }
-
-    /**
-     * A mutable integer that lets queue objects be reused once they start overflowing.
-     */
-    private static class MI {
-      MI() { }
-      public int value;
-    } // end class RandomSample.IntPriorityQueue.MI
-
-    /**
-     * The mutable integer instance for reuse after first overflow.
-     */
-    private MI mi;
-
-  } // end class RandomSample.IntPriorityQueue
-
-  /**
-   * For specifying which sampling algorithm to use.
-   */
-  public static class Algorithm {
-
-    /**
-     * Specifies a methodical traversal algorithm, which is guaranteed to span the collection
-     * at least once, and never to return duplicates. Faster than the hashing algorithm and
-     * uses much less space, but the randomness of the sample may be affected by systematic
-     * variations in the collection. Requires only an array for the sample, and visits only
-     * the number of elements in the sample set, not the full set.
-     */
-    // TODO (Facet): This one produces a bimodal distribution (very flat around
-    // each peak!) for collection size 10M and sample sizes 10k and 10544.
-    // Figure out why.
-    public static final Algorithm TRAVERSAL = new Algorithm("Traversal");
-
-    /**
-     * Specifies a Fibonacci-style hash algorithm (see Knuth, S&S), which generates a less
-     * systematically distributed subset of the sampled collection than the traversal method,
-     * but requires a bounded priority queue the size of the sample, and creates an object
-     * containing a sampled value and its hash, for every element in the full set. 
-     */
-    public static final Algorithm HASHING = new Algorithm("Hashing");
-
-    /**
-     * Constructs an instance of an algorithm.
-     * @param name An ID for printing.
-     */
-    private Algorithm(String name) {
-      this.name = name;
-    }
-
-    /**
-     * Prints this algorithm's name.
-     */
-    @Override
-    public String toString() {
-      return this.name;
-    }
-
-    /**
-     * The name of this algorithm, for printing.
-     */
-    private String name;
-
-  } // end class RandomSample.Algorithm
-
-  /**
-   * For specifying whether to sort the sample.
-   */
-  public static class Sorted {
-
-    /**
-     * Specifies sorting the resulting sample before returning.
-     */
-    public static final Sorted YES = new Sorted("sorted");
-
-    /**
-     * Specifies not sorting the resulting sample. 
-     */
-    public static final Sorted NO = new Sorted("unsorted");
-
-    /**
-     * Constructs an instance of a "sorted" selector.
-     * @param name An ID for printing.
-     */
-    private Sorted(String name) {
-      this.name = name;
-    }
-
-    /**
-     * Prints this selector's name.
-     */
-    @Override
-    public String toString() {
-      return this.name;
-    }
-
-    /**
-     * The name of this selector, for printing.
-     */
-    private String name;
-
-  } // end class RandomSample.Sorted
-
-  /**
-   * Magic number 1: prime closest to phi, in 32 bits.
-   */
-  private static final long PHI_32 = 2654435769L;
-
-  /**
-   * Magic number 2: multiplicative inverse of PHI_32, modulo 2**32.
-   */
-  private static final long PHI_32I = 340573321L;
-
-  /**
-   * Switch to cause methods to return timings.
-   */
-  private static boolean returnTimings = false;
-
-  /**
-   * Self-test.
-   */
-  public static void main(String[] args) throws Exception {
-    RandomSample.returnTimings = true;
-    /*
-     * Create an array of sequential integers, from which samples will be taken.
-     */
-    final int COLLECTION_SIZE = 10 * 1000 * 1000;
-    ScoredDocIDs collection = createAllScoredDocs(COLLECTION_SIZE);
-
-    /*
-     * Factor PHI.
-     *
-        int[] factors = RandomSample.factor(PHI_32);
-        System.out.print("Factors of PHI_32: ");
-        for (int k : factors) {
-          System.out.print(k+", ");
-        }
-        System.out.println("");
-
-     * Verify inverse relationship of PHI & phi.
-     *
-        boolean inverseValid = true;
-        for (int j = 0; j < Integer.MAX_VALUE; j++) {
-          int k = (int)(j * PHI_32) & 0x7FFFFFFF;
-          int m = (int)(k * PHI_32I) & 0X7FFFFFFF;
-          if (j != m) {
-            System.out.println("Inverse not valid for "+j);
-            inverseValid = false;
-          }
-        }
-        System.out.println("Inverse valid? "+inverseValid);
-     */
-    /*
-     * Take samples of various sizes from the full set, verify no duplicates,
-     * check flatness.
-     */
-    int[] sampleSizes = {
-        10, 57, 100, 333, 1000, 2154, 10000
-    };
-    Algorithm[] algorithms = { Algorithm.HASHING, Algorithm.TRAVERSAL };
-    for (int sampleSize : sampleSizes) {
-      for (Algorithm algorithm : algorithms) {
-        System.out.println("Sample size " + sampleSize
-            + ", algorithm " + algorithm + "...");
-        /*
-         * Take the sample.
-         */
-        int[] sample = RandomSample.repeatableSample(
-            collection, COLLECTION_SIZE, sampleSize, algorithm, Sorted.YES);
-        /*
-         * Check for duplicates.
-         */
-        boolean noDups = true;
-        for (int j = 0; j < sampleSize - 1; j++) {
-          if (sample[j] == sample[j + 1]) {
-            System.out.println("Duplicate value "
-                + sample[j] + " at " + j + ", "
-                + (j + 1));
-            noDups = false;
-            break;
-          }
-        }
-        if (noDups) {
-          System.out.println("No duplicates.");
-        }
-        if (algorithm == Algorithm.HASHING) {
-          System.out.print("Hashed sample, up to 100 of "+sampleSize+": ");
-          int lim = Math.min(100, sampleSize);
-          for (int k = 0; k < lim; k++) {
-            System.out.print(sample[k]+", ");
-          }
-          System.out.println("");
-        }
-        /*
-         * Check flatness of distribution in sample.
-         */
-        final int N_INTERVALS = 100;
-        int[] counts = RandomSample.countsBySubrange(sample, COLLECTION_SIZE, N_INTERVALS);
-        int minCount = Integer.MAX_VALUE;
-        int maxCount = Integer.MIN_VALUE;
-        int avgCount = 0;
-        for (int j = 0; j < N_INTERVALS; j++) {
-          int count = counts[j];
-          if (count < minCount) {
-            minCount = count;
-          }
-          if (count > maxCount) {
-            maxCount = count;
-          }
-          avgCount += count;
-        }
-        avgCount /= N_INTERVALS;
-        System.out.println("Min, max, avg: "+minCount+", "+maxCount+", "+avgCount);
-
-        if (((double)minCount - avgCount)/avgCount < -0.05 && (minCount - avgCount) < -5) {
-          System.out.println("Not flat enough.");
-        } else if (((double)maxCount - avgCount)/avgCount > 0.05 && (maxCount - avgCount) > 5) {
-          System.out.println("Not flat enough.");
-        } else {
-          System.out.println("Flat enough.");
-        }
-        if (sampleSize == 10544 && algorithm == Algorithm.TRAVERSAL) {
-          System.out.print("Counts of interest: ");
-          for (int j = 0; j < N_INTERVALS; j++) {
-            System.out.print(counts[j]+", ");
-          }
-          System.out.println("");
-        }
-      }
-    }
-    System.out.println("Last prime is "
-        + RandomSample.primes[RandomSample.N_PRIMES - 1]);
-  }
-
-  private static ScoredDocIDs createAllScoredDocs(final int COLLECTION_SIZE)
-  throws CorruptIndexException, LockObtainFailedException, IOException {
-    ScoredDocIDs collection;
-
-    IndexReader reader = null;
-    Directory ramDir = new RAMDirectory();
-    try {
-      IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_30, new KeywordAnalyzer()));
-      for (int i = 0; i < COLLECTION_SIZE; i++) {
-        writer.addDocument(new Document());
-      }
-      writer.commit();
-      writer.close();
-      reader = IndexReader.open(ramDir);
-      collection = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader);
-    } finally {
-      if (reader != null) {
-        reader.close();
-      }
-      ramDir.close();
-    }
-    return collection;
-  }
-} // end class RandomSample
