Index: src/java/org/apache/solr/search/SolrIndexSearcher.java
===================================================================
--- src/java/org/apache/solr/search/SolrIndexSearcher.java	(revision 794328)
+++ src/java/org/apache/solr/search/SolrIndexSearcher.java	Sat Jul 25 12:16:43 CEST 2009
@@ -1196,6 +1196,43 @@
   }
 
   /**
+   * Returns documents matching both <code>query</code> and the intersection
+   * of <code>filterList</code>, sorted by <code>sort</code>.
+   * Also returns the compete set of documents
+   * matching <code>query</code> and <code>filter</code>
+   * (regardless of <code>offset</code> and <code>len</code>).
+   * <p>
+   * This method is cache aware and may retrieve <code>filter</code> from
+   * the cache or make an insertion into the cache as a result of this call.
+   * <p>
+   * FUTURE: The returned DocList may be retrieved from a cache.
+   * <p>
+   * The DocList and DocSet returned should <b>not</b> be modified.
+   *
+   * @param query
+   * @param filterList   may be null
+   * @param docSet      filter docSet
+   * @param lsort    criteria by which to sort (if null, query relevance is used)
+   * @param offset   offset into the list of documents to return
+   * @param len      maximum number of documents to return
+   * @param flags    user supplied flags for the result set
+   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
+   * @throws IOException
+   */
+  public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, DocSet docSet, Sort lsort, int offset, int len, int flags) throws IOException {
+    //DocListAndSet ret = new DocListAndSet();
+    //getDocListC(ret,query,filterList,docSet,lsort,offset,len, flags |= GET_DOCSET);
+
+    QueryCommand qc = new QueryCommand();
+    qc.setQuery(query).setFilterList(filterList).setFilter(docSet);
+    qc.setSort(lsort).setOffset(offset).setLen(len).setFlags(flags |= GET_DOCSET);
+    QueryResult result = new QueryResult();
+    getDocListC(result,qc);
+
+    return result.getDocListAndSet();
+  }
+
+  /**
    * Returns documents matching both <code>query</code> and <code>filter</code>
    * and sorted by <code>sort</code>.  Also returns the compete set of documents
    * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
Index: src/java/org/apache/solr/handler/component/CollapseComponent.java
===================================================================
--- src/java/org/apache/solr/handler/component/CollapseComponent.java	Thu Jun 18 17:52:22 CEST 2009
+++ src/java/org/apache/solr/handler/component/CollapseComponent.java	Thu Jun 18 17:52:22 CEST 2009
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.component;
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.CollapseParams;
+import org.apache.solr.common.params.CollapseParams.CollapseFacet;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.search.CollapseFilter;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.DocListAndSet;
+
+
+import java.io.IOException;
+import java.net.URL;
+
+/**
+ * Collapse component is responsible for do field collapsing with the {@link org.apache.solr.search.CollapseFilter},
+ * that does the most of the work. Collapsing is activated by specifying <code>collapse.field</code> in the request
+ * as parameter.
+ * If this parameter is not specified it falls back to the {@link QueryComponent#process(ResponseBuilder)} method.
+ * <br/><br/>
+ * If the parameter <code>collapse.facet</code> with value <code>after</code> is specified,
+ * it replaces the collapsed docSet from <code>DocListAndSet</code> with the uncollapsed docset, so that facetation
+ * counts are based on the uncollapsed search result (which is usually desired behaviour)
+ *
+ * @version $Id: QueryComponent.java 602341 2007-12-08 07:27:49Z ryan $
+ * @since solr 1.3
+ */
+public class CollapseComponent extends QueryComponent {
+
+
+  /**
+   * Actually run the query
+   */
+  @Override
+  public void process(ResponseBuilder rb) throws IOException {
+    SolrQueryRequest req = rb.req;
+    SolrQueryResponse rsp = rb.rsp;
+    SolrIndexSearcher searcher = req.getSearcher();
+    SolrParams params = req.getParams();
+
+    if (params.get(CollapseParams.COLLAPSE_FIELD) == null) {
+      super.process(rb);
+      return;
+    }
+
+    DocSet collapseFilterDocSet = null;
+    CollapseFilter collapseFilter = null;
+    boolean facetAfterCollapse = true;
+    if (params.get(CollapseParams.COLLAPSE_FIELD) != null) {
+      collapseFilter = new CollapseFilter(rb.req.getCore().getSolrConfig(),
+              searcher,
+              rb.getQuery(),
+              rb.getFilters(),
+              rb.getSortSpec().getSort(),
+              params,
+              rb.getFieldFlags());
+      collapseFilterDocSet = collapseFilter.getDocSet();
+      facetAfterCollapse = (collapseFilter.getCollapseFacet() == CollapseFacet.AFTER);
+    }
+
+
+    DocListAndSet results = searcher.getDocListAndSet(rb.getQuery(),
+            collapseFilterDocSet == null ? rb.getFilters() : null,
+            collapseFilterDocSet,
+            rb.getSortSpec().getSort(),
+            rb.getSortSpec().getOffset(),
+            rb.getSortSpec().getCount(),
+            rb.getFieldFlags());
+
+    //for getting the facet count BEFORE the collapsing, we must
+    //get the doc. collection without filtering by the collapseFilterDocSet.
+    if (!facetAfterCollapse) {
+      results.docSet = collapseFilter.getUncollapsedDocSet();
+    }
+
+    rb.setResults(results);
+    if (null != collapseFilter) {
+      rsp.add("collapse_counts", collapseFilter.getCollapseInfo(searcher, results.docList));
+    }
+
+    rsp.add("response", results.docList);
+  }
+
+
+  /////////////////////////////////////////////
+  ///  SolrInfoMBean
+  ////////////////////////////////////////////
+  @Override
+  public String getDescription() {
+    return "Field Collapsing";
+  }
+
+  @Override
+  public String getVersion() {
+    return "";
+  }
+
+  @Override
+  public String getSourceId() {
+    return "";
+  }
+
+  @Override
+  public String getSource() {
+    return "";
+  }
+
+  @Override
+  public URL[] getDocs() {
+    return null;
+  }
+}
Index: src/java/org/apache/solr/util/DocSetScoreCollector.java
===================================================================
--- src/java/org/apache/solr/util/DocSetScoreCollector.java	Sat Jul 25 12:22:33 CEST 2009
+++ src/java/org/apache/solr/util/DocSetScoreCollector.java	Sat Jul 25 12:22:33 CEST 2009
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.BitDocSet;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+
+/**
+ * Collects the documents with the score.
+ */
+public class DocSetScoreCollector extends Collector {
+  private final float[] scores;
+  private final OpenBitSet bits;
+
+  private Scorer scorer = null;
+  private int docBase = 0;
+
+  /**
+   * Constructs a DocSetHitCollector with the specified parameter.
+   *
+   * @param maxDoc The maximum size of all the documents in the index
+   */
+  public DocSetScoreCollector(int maxDoc) {
+    scores = new float[maxDoc];
+    bits = new OpenBitSet(maxDoc);
+  }
+
+  /**
+   * Collects documents as specified in {@link super#collect(int)}.
+   * Also stores the score associated with this document.
+   *
+   * @param doc The document id to collect
+   * @throws IOException
+   */
+  public void collect(int doc) throws IOException {
+    doc = doc + docBase;
+    bits.fastSet(doc);
+    scores[doc] = scorer.score();
+  }
+
+ /**
+  * {@inheritDoc}
+  */
+  public boolean acceptsDocsOutOfOrder() {
+    return false;
+  }
+
+    // ================================================= Setter/Getter ===================================================
+
+  /**
+   * {@inheritDoc}
+   */
+  public void setNextReader(IndexReader reader, int docBase) throws IOException {
+    this.docBase = docBase;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  public void setScorer(Scorer scorer) throws IOException {
+    this.scorer = scorer;
+  }
+
+  /**
+   * Returns a docset of the collected documents.
+   *
+   * @return a docset of the collected documents
+   */
+  public DocSet getDocSet() {
+    return new BitDocSet(bits);
+  }
+
+  /**
+   * Returns the scores for the collected documents.
+   * The index of the score represents the lucene document identifier.
+   *
+   * @return the scores for the collected documents
+   */
+  public float[] getScores() {
+    return scores;
+  }
+
+}
Index: src/java/org/apache/solr/search/CollapseFilter.java
===================================================================
--- src/java/org/apache/solr/search/CollapseFilter.java	Sat Jul 25 14:34:54 CEST 2009
+++ src/java/org/apache/solr/search/CollapseFilter.java	Sat Jul 25 14:34:54 CEST 2009
@@ -0,0 +1,920 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ExtendedFieldCache;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Explanation;
+import org.apache.solr.common.params.CollapseParams;
+import org.apache.solr.common.params.CollapseParams.CollapseFacet;
+import org.apache.solr.common.params.CollapseParams.CollapseType;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.util.DocSetScoreCollector;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Collections;
+
+/**
+ * Field collapsing is the process of removing documents that have the same value on a predifined field,
+ * only the most relevant (based sorting / scoring) document will stay in the resultset. <br />
+ * <br />
+ * The collapse filter main responsiblities are creating a docset that only contains the most relevant documents
+ * and creating statistics that tell how many documents were collapsed per unique field value. <br />
+ * <br />
+ * Currently there are two different manners of collapsing documents:
+ * <ul>
+ * <li> Only collapse documents that have the same field value and appear next to earch other in the resultset.
+ * Also known as adjacent field collapsing
+ * <li> Collapse all documents in the query result that have the same field value,
+ * irrespectable where the documents are located in the resultset. Also known as normal field collapsing
+ * </ul>
+ *
+ * @author Emmanuel Keller keller.emmanuel@gmail.com
+ * @version $Id:$
+ * @since solr 1.3
+ */
+public class CollapseFilter {
+
+  /** Parameters *************************************************** */
+
+  /**
+   * Field to use to collapse results. Parameter.
+   */
+  private String collapseField;
+
+  /**
+   * Type of collapsing to do -- collapse all hits or adjacent hits only.
+   * Parameter.
+   */
+  private CollapseType collapseType;
+
+  /**
+   * Facet before or after collapsing.
+   */
+  private CollapseFacet collapseFacet;
+
+  /**
+   * Number of documents with the same value for collapseField after which
+   * collapsing kicks in. Parameter.
+   */
+  private int collapseTreshold;
+
+  /**
+   * Maximum number of documents to process during field collapsing.
+   * Parameter.
+   */
+  private int collapseMaxDocs;
+
+  /**
+   * Whether to include collapse count for each document id in the response. Parameter.
+   */
+  private boolean collapseInfoDoc;
+
+  /**
+   * Whether to include collapse count for each field value in the response. Parameter.
+   */
+  private boolean collapseInfoCount;
+
+  /** Collapse State *********************************************** */
+
+  /**
+   * Number of documents that have been collapsed into the key document.
+   */
+  private Map<Integer, Integer> collapseCounts;
+
+  /**
+   * Maxmimum size for a HashDocSet.
+   */
+  private int hashMaxSize;
+
+  /**
+   * Buffer for collecting documents. Gets turned into different types of
+   * DocSet depending on the number of documents we end up with.
+   */
+  private int[] docbuf;
+
+  /**
+   * Number of documents in docbuf.
+   */
+  private int docbufSize = 0;
+
+  /**
+   * Maximum document id currently in docbuf. Only valid while docbufSize <
+   * hashMaxSize.
+   */
+  private int docbufMaxDoc = 0;
+
+  /**
+   * Bitset representation of docbuf. Gets created when docbufSize >=
+   * hashMaxSize.
+   */
+  private OpenBitSet docbufBitSet;
+
+  /**
+   * The result of executing the query and filter queries, without collapsing.
+   */
+  private final DocSet uncollapsedDocSet;
+
+
+  /**
+   * Debug Information ********************************************
+   */
+
+  private long timeCollapsing = 0;
+  private long timeConvertToBitSet = 0;
+  private long timeCreateDocSet = 0;
+  private long timeCreateCollapseInfo = 0;
+  private long timeCreateUncollapedDocset = 0;
+  private String debugDocSetInfo = "unknown";
+
+  /**
+   * Creates a CollapseFilter based on the specified parameters.
+   *
+   * @param config   The Solr config
+   * @param searcher The solr index searcher
+   * @param query    The main query
+   * @param filters  List containing filter queries
+   * @param sort     The lucene sort
+   * @param params   The solr parameters
+   * @param flags    The flags parameter
+   * @throws IOException if index searcher related errors occur
+   */
+  public CollapseFilter(SolrConfig config,
+                        SolrIndexSearcher searcher,
+                        Query query,
+                        List<Query> filters,
+                        Sort sort,
+                        SolrParams params,
+                        int flags) throws IOException {
+    parseParameters(searcher, params);
+
+    // Allocate data structures
+    hashMaxSize = config.hashDocSetMaxSize;
+    docbuf = new int[hashMaxSize];
+    collapseCounts = new HashMap<Integer, Integer>();
+
+    if (collapseType == CollapseType.ADJACENT) {
+      long startTime = System.currentTimeMillis();
+      uncollapsedDocSet = searcher.getDocList(query, filters, sort, 0, collapseMaxDocs, flags);
+      timeCreateUncollapedDocset = System.currentTimeMillis() - startTime;
+      String[] values = FieldCache.DEFAULT.getStrings(searcher.getReader(), collapseField);
+      adjacentCollapse(uncollapsedDocSet, values);
+    } else {
+      DocumentComparator documentComparator;
+      if (containsSortOnScore(sort)) {
+        DocSetScoreCollector docSetCollector = new DocSetScoreCollector(searcher.maxDoc());
+        long startTime = System.currentTimeMillis();
+        searcher.search(createBooleanQuery(query, filters), null, docSetCollector);
+        timeCreateUncollapedDocset = System.currentTimeMillis() - startTime;
+        uncollapsedDocSet = docSetCollector.getDocSet();
+
+        if (sort == null) {
+          sort = new Sort(new SortField("score", SortField.SCORE, true));
+        }
+        documentComparator = new DocumentComparator(sort, searcher.getIndexReader().numDocs(), searcher.getIndexReader(), docSetCollector.getScores());
+      } else {
+        DocSetCollector docSetCollector = new DocSetCollector(0, searcher.maxDoc());
+        long startTime = System.currentTimeMillis();
+        searcher.search(createBooleanQuery(query, filters), null, docSetCollector);
+        timeCreateUncollapedDocset = System.currentTimeMillis() - startTime;
+        uncollapsedDocSet = docSetCollector.getDocSet();
+        documentComparator = new DocumentComparator(sort, searcher.getIndexReader().numDocs(), searcher.getIndexReader());
+      }
+
+      String[] values = FieldCache.DEFAULT.getStrings(searcher.getReader(), collapseField);
+      normalCollapse(uncollapsedDocSet, values, documentComparator);
+    }
+  }
+
+  /**
+   * Constructor for testing purposes.
+   *
+   * @param collapseMaxDocs  The maximum documents to collapse
+   * @param collapseTreshold Number of documents with the same value for collapseField after which collapsing kicks in
+   */
+  CollapseFilter(int collapseMaxDocs, int collapseTreshold) {
+    uncollapsedDocSet = null;
+    collapseCounts = new HashMap<Integer, Integer>();
+    this.collapseMaxDocs = collapseMaxDocs;
+    this.collapseTreshold = collapseTreshold;
+  }
+
+  /**
+   * @return a DocSet representation of the internal document buffer.
+   */
+  public DocSet getDocSet() {
+    long startTime = System.currentTimeMillis();
+    DocSet result = (docbufBitSet != null) ? new BitDocSet(docbufBitSet) : new HashDocSet(docbuf, 0, docbufSize);
+    timeCreateDocSet = System.currentTimeMillis() - startTime;
+    debugDocSetInfo = result.getClass().getSimpleName() + "(" + docbufSize + ")";
+    return result;
+  }
+
+  /**
+   * @param searcher The solr index searcher
+   * @param docs     The doclist containing the results to be displayed
+   * @return collapse counts for all documents in the specified docList.
+   * @throws IOException if an index searcher related problems occur
+   */
+  public NamedList<Object> getCollapseInfo(SolrIndexSearcher searcher, DocList docs) throws IOException {
+    long startTime = System.currentTimeMillis();
+
+    NamedList<Object> result = new NamedList<Object>();
+    result.add("field", collapseField);
+
+    IndexSchema schema = searcher.getSchema();
+    FieldType collapseFieldType = schema.getField(collapseField).getType();
+    SchemaField uniqueKeyField = schema.getUniqueKeyField();
+    String uniqueKeyName = (uniqueKeyField != null) ? uniqueKeyField.getName() : null;
+
+    if (collapseInfoDoc || collapseInfoCount) {
+      NamedList<Integer> resDoc = null;
+      NamedList<Integer> resCount = null;
+      String[] values = null;
+      IndexReader reader = null;
+
+      if (collapseInfoDoc) {
+        resDoc = new NamedList<Integer>();
+        result.add("doc", resDoc);
+        reader = searcher.getReader();
+      }
+
+      if (collapseInfoCount) {
+        resCount = new NamedList<Integer>();
+        result.add("count", resCount);
+        values = FieldCache.DEFAULT.getStrings(searcher.getReader(), collapseField);
+      }
+
+      for (DocIterator i = docs.iterator(); i.hasNext();) {
+        int id = i.nextDoc();
+        Integer count = collapseCounts.get(id);
+        if (count != null) {
+          if (collapseInfoDoc && uniqueKeyName != null) {
+            resDoc.add(reader.document(id).get(uniqueKeyName), count);
+          }
+          if (collapseInfoCount) {
+            resCount.add(collapseFieldType.indexedToReadable(values[id]), count);
+          }
+        }
+      }
+    }
+
+    timeCreateCollapseInfo = System.currentTimeMillis() - startTime;
+    result.add("debug", getDebugInfo());
+    return result;
+  }
+
+  
+  // ================================================= Helpers =======================================================
+
+  /**
+   * Applies normal collapsing on the specified uncollapsedDocSet.
+   *
+   * @param uncollapsedDocset The
+   * @param values            The field values to collapse on
+   * @param comparator        The document comparator, used for determining the head of the collapsed document group
+   */
+  protected void normalCollapse(DocSet uncollapsedDocset, String[] values, DocumentComparator comparator) {
+    long startTime = System.currentTimeMillis();
+    int docCount = 0;
+    // Keep how many documents we have processed the track of how many docs
+    // with the same collapse value we have processed so far.
+    Map<String, CollapsedDocumentGroup> collapsedDocs = new HashMap<String, CollapsedDocumentGroup>();
+
+    for (DocIterator i = uncollapsedDocset.iterator(); i.hasNext();) {
+      int currentId = i.nextDoc();
+      String currentValue = values[currentId];
+
+      // Get the last doc. and the total amount of docs. we have seen so
+      // far for this collapsing value
+      CollapsedDocumentGroup collapseDoc = collapsedDocs.get(currentValue);
+      if (collapseDoc == null) {
+        // new collapsing value => create a new record for it
+        collapseDoc = new CollapsedDocumentGroup(0, 0, comparator, collapseTreshold);
+        collapsedDocs.put(currentValue, collapseDoc);
+      }
+      collapseDoc.priorityQueue.insertWithOverflow(currentId);
+
+      // check if we have reached the collapse threshold, if so start counting collapsed documents
+      if (++collapseDoc.totalCount > collapseTreshold) {
+        collapseDoc.collapsedDocuments++;
+      }
+
+      // Stop after collapseMaxDocs documents
+      if (++docCount >= collapseMaxDocs) {
+        break;
+      }
+    }
+
+    // adding the head documents to the internal document buffer and
+    // adding the collapsed counts per document head to the map
+    for (CollapsedDocumentGroup collapseDoc : collapsedDocs.values()) {
+      if (collapseDoc.collapsedDocuments > 0) {
+        collapseCounts.put((Integer) collapseDoc.priorityQueue.top(), collapseDoc.collapsedDocuments);
+      }
+      Integer doc;
+      while ((doc = (Integer) collapseDoc.priorityQueue.pop()) != null) {
+        addDoc(doc);
+      }
+    }
+
+    timeCollapsing = System.currentTimeMillis() - startTime;
+  }
+
+  /**
+   * Applies adjacent collapsing on the specified uncollapsedDocSet.
+   *
+   * @param uncollapsedDocset The uncollapsed docset
+   * @param values            The fieldvalues the collapse on
+   */
+  protected void adjacentCollapse(DocSet uncollapsedDocset, String[] values) {
+    int docCount = 0; // how many documents we have processed
+    int repeatCount = 0; // how many times we have seen the same value in a
+    int collapseCount = 0; // how many documents we have collapsed in this
+    int collapseId = -1; // the document we're collapsing into
+    String collapseValue = null;
+
+
+    long startTime = System.currentTimeMillis();
+    for (DocIterator i = uncollapsedDocset.iterator(); i.hasNext();) {
+      int currentId = i.nextDoc();
+      String currentValue = values[currentId];
+
+      // Initializing
+      if (collapseValue == null) {
+        repeatCount = 0;
+        collapseCount = 0;
+        collapseId = currentId;
+        collapseValue = currentValue;
+
+        // Collapse the document if the field value is the same and
+        // we have a run of at least collapseThreshold uncollapsedDocset.
+      } else if (collapseValue.equals(currentValue)) {
+        if (++repeatCount >= collapseTreshold) {
+          collapseCount++;
+        } else {
+          addDoc(currentId);
+        }
+      } else {
+        addDoc(collapseId);
+        if (collapseCount > 0) {
+          collapseCounts.put(collapseId, collapseCount);
+        }
+
+        repeatCount = 0;
+        collapseCount = 0;
+        collapseId = currentId;
+        collapseValue = currentValue;
+      }
+
+      // Stop after collapseMaxDocs documents
+      if (++docCount >= collapseMaxDocs) {
+        break;
+      }
+    }
+
+    if (collapseId != -1) {
+      addDoc(collapseId);
+    }
+
+    if (collapseCount > 0) {
+      collapseCounts.put(collapseId, collapseCount);
+    }
+
+    timeCollapsing = System.currentTimeMillis() - startTime;
+  }
+
+  /**
+   * Adds a document to the internal document buffer.
+   *
+   * @param doc The lucene identifier of the document to add
+   */
+  protected void addDoc(int doc) {
+    // If we have less than hashMaxSize documents, just
+    // keep adding them to docbuf. We will turn them into
+    // a HashDocSet later.
+
+    if (docbufSize < hashMaxSize) {
+      docbuf[docbufSize] = doc;
+      if (doc > docbufMaxDoc) {
+        docbufMaxDoc = doc;
+      }
+    } else {
+      // We have exceeded hashMaxSize. Allocate a bit set
+      // if we don't have one yet, then add to that.
+      if (docbufBitSet == null) {
+        long startTime = System.currentTimeMillis();
+        docbufBitSet = new OpenBitSet(docbufMaxDoc + 1);
+        for (int i = 0; i < docbufSize; i++) {
+          docbufBitSet.fastSet(docbuf[i]);
+        }
+        timeConvertToBitSet = System.currentTimeMillis() - startTime;
+      }
+      docbufBitSet.set(doc);
+    }
+    docbufSize++;
+  }
+
+  /**
+   * Returns a boolean query that contains the specified mainQuery and filter queries with a must clause.
+   *
+   * @param mainQuery     The user query
+   * @param filterQueries The filter queries
+   * @return a boolean query that contains the specified mainQuery and filter queries with a must clause
+   */
+  protected BooleanQuery createBooleanQuery(Query mainQuery, List<Query> filterQueries) {
+    BooleanQuery booleanQuery = new BooleanQuery();
+    booleanQuery.add(mainQuery, BooleanClause.Occur.MUST);
+    if (filterQueries != null) {
+      for (Query filterQuery : filterQueries) {
+        booleanQuery.add(filterQuery, BooleanClause.Occur.MUST);
+      }
+    }
+    return booleanQuery;
+  }
+
+  /**
+   * Parses the parameters relevant for field collapsing from the specified params
+   *
+   * @param searcher Solr index searcher
+   * @param params   The specified params, containing solr specific parameters
+   * @throws IOException when SolrIndexSearcher problems occur
+   */
+  protected void parseParameters(SolrIndexSearcher searcher, SolrParams params) throws IOException {
+    collapseField = params.required().get(CollapseParams.COLLAPSE_FIELD);
+    String type = params.get(CollapseParams.COLLAPSE_TYPE);
+    collapseType = (type != null) ? CollapseType.get(type) : CollapseType.NORMAL;
+
+    String facet = params.get(CollapseParams.COLLAPSE_FACET);
+    collapseFacet = (facet != null) ? CollapseFacet.get(facet) : CollapseFacet.AFTER;
+
+    Integer ct = params.getInt(CollapseParams.COLLAPSE_THRESHOLD);
+    if (ct == null) {
+      ct = params.getInt(CollapseParams.COLLAPSE_MAX);
+    }
+    collapseTreshold = (ct != null) ? ct : 1;
+
+    collapseMaxDocs = params.getInt(CollapseParams.COLLAPSE_MAXDOCS, 0);
+    if (collapseMaxDocs <= 0) {
+      collapseMaxDocs = searcher.maxDoc();
+    }
+
+    collapseInfoDoc = params.getBool(CollapseParams.COLLAPSE_INFO_DOC, true);
+    collapseInfoCount = params.getBool(CollapseParams.COLLAPSE_INFO_COUNT, true);
+  }
+
+  /**
+   * @return timing information for field collapsing process.
+   */
+  protected NamedList getDebugInfo() {
+    long totalTime = timeCreateUncollapedDocset + timeCollapsing + timeCreateCollapseInfo + timeConvertToBitSet + timeCreateDocSet;
+    NamedList<Object> namedList = new NamedList<Object>();
+    namedList.add("Docset type", debugDocSetInfo);
+    namedList.add("Total collapsing time(ms)", totalTime);
+    namedList.add("Create uncollapsed docset(ms)", timeCreateUncollapedDocset);
+    namedList.add(String.format("Collapsing %s time(ms)", collapseType), timeCollapsing);
+    namedList.add("Creating collapseinfo time(ms)", timeCreateCollapseInfo);
+    namedList.add("Convert to bitset time(ms)", timeConvertToBitSet);
+    namedList.add("Create collapsed docset time(ms)", timeCreateDocSet);
+    return namedList;
+  }
+
+  /**
+   * Returns <code>true</code> if the sort contains a sortfield that sorts on score, otherwise <code>false</code>.
+   *
+   * @param sort The sort
+   * @return <code>true</code> if the sort contains a sortfield that sorts on score, otherwise <code>false</code>
+   */
+  protected boolean containsSortOnScore(Sort sort) {
+    if (sort == null) {
+      return true; // means default sorting, which is sorting on score desc
+    }
+
+    for (SortField field : sort.getSort()) {
+      if (field.getType() == SortField.SCORE) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+
+  // =========================================== Getters / Setters ===================================================
+
+  /**
+   * @return facet before or after collapsing
+   */
+  public CollapseFacet getCollapseFacet() {
+    return collapseFacet;
+  }
+
+  /**
+   * @return field to use to collapse results
+   */
+  public String getCollapseField() {
+    return collapseField;
+  }
+
+  /**
+   * @return whether to include collapse count for each field value in the response
+   */
+  public boolean isCollapseInfoCount() {
+    return collapseInfoCount;
+  }
+
+  /**
+   * @return whether to unclude collapse counts for each document in the response
+   */
+  public boolean isCollapseInfoDoc() {
+    return collapseInfoDoc;
+  }
+
+  /**
+   * @return maximum number of documents to process during field collapsing
+   */
+  public int getCollapseMaxDocs() {
+    return collapseMaxDocs;
+  }
+
+  /**
+   * @return number of documents with the same value for collapseField after which collapsing kicks in.
+   */
+  public int getCollapseTreshold() {
+    return collapseTreshold;
+  }
+
+  /**
+   * @return the type of collapsing to do, all documents or adjacent documents
+   */
+  public CollapseType getCollapseType() {
+    return collapseType;
+  }
+
+  /**
+   * @return the result of executing the query and filter queries, without field collapsing.
+   */
+  public DocSet getUncollapsedDocSet() {
+    return uncollapsedDocSet;
+  }
+
+  /**
+   * @return number of documents that have been collapsed into the key document
+   */
+  public Map<Integer, Integer> getCollapseCounts() {
+    return Collections.unmodifiableMap(collapseCounts);
+  }
+
+
+  // ============================================ Inner Classes ======================================================
+
+  /**
+   * A <code>PriorityQueue</code> that maintaince order with a <code>DocumentComparator</code>.
+   */
+  public static class DocumentPriorityQueue extends PriorityQueue {
+
+    private final DocumentComparator comparator;
+
+    /**
+     * Constructs a <code>DocumentPriorityQueue</code>
+     *
+     * @param comparator The <code>DocumentComparator</code> used for maintaining order in the queue
+     * @param max        The maximum number of document identifiers in the queue (is equal to collapse thresold paramter)
+     */
+    public DocumentPriorityQueue(DocumentComparator comparator, int max) {
+      this.comparator = comparator;
+      initialize(max);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected boolean lessThan(Object a, Object b) {
+      return comparator.compare((Integer) a, (Integer) b) < 0;
+    }
+  }
+
+  /**
+   * Compares two documents with each other.
+   */
+  public static class DocumentComparator {
+
+    private final FieldComparator[] fieldComparators;
+    private final boolean[] descending;
+    private Scorer scorer;
+
+    /**
+     * Constructs a <code>DocumentComparator</code> by initializing the
+     * {@link org.apache.lucene.search.FieldComparator}s and determining the sort orders.
+     *
+     * @param sort         The sort used for the creation of the FieldComparators.
+     * @param numberOfHits The number of results in the pre-field-collapsed resultset
+     * @param reader       The index reader, used for reading field values (in the FieldComparators)
+     */
+    public DocumentComparator(Sort sort, int numberOfHits, IndexReader reader) {
+      fieldComparators = new FieldComparator[sort.getSort().length];
+      descending = new boolean[sort.getSort().length];
+      initializeFieldComparators(sort, numberOfHits, reader, null);
+    }
+
+    /**
+     * Constructs a <code>DocumentComparator</code> by initializing the
+     * {@link org.apache.lucene.search.FieldComparator}s and determining the sort orders.
+     *
+     * @param sort         The sort used for the creation of the FieldComparators.
+     * @param numberOfHits The number of results in the pre-field-collapsed resultset
+     * @param reader       The index reader, used for reading field values (in the FieldComparators)
+     * @param scores       The scores used for comparing the documents
+     */
+    public DocumentComparator(Sort sort, int numberOfHits, IndexReader reader, float[] scores) {
+      fieldComparators = new FieldComparator[sort.getSort().length];
+      descending = new boolean[sort.getSort().length];
+      initializeFieldComparators(sort, numberOfHits, reader, scores);
+    }
+
+    /**
+     * Compares doc1 and doc2 with each other.
+     * Compares the two documents with the initialized <code>FieldComparators</code>,
+     * if all <code>FieldComparators</code> compares the documents as equal then the document with lowest lucene
+     * identifier will be classified as most relevant.
+     *
+     * @param doc1 The lucene identifier of the first document
+     * @param doc2 The lucene identifier of the second document
+     * @return -1 if doc1 is less relevant or equal relevant but has a higher lucene id then doc2,
+     *         0 if both documents are identical
+     *         1 if doc1 is more relevant or equal relevant but has a lower lucene id then doc2
+     */
+    public int compare(int doc1, int doc2) {
+      int result;
+      for (int i = 0; i < fieldComparators.length; i++) {
+        FieldComparator fieldComparator = fieldComparators[i];
+        try {
+          scorer.skipTo(doc2);
+          fieldComparator.copy(doc2, doc2);
+          scorer.skipTo(doc1);
+          fieldComparator.copy(doc1, doc1);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        result = fieldComparator.compare(doc1, doc2);
+        result = descending[i] ? result : -result;
+
+        if (result != 0) {
+          return result;
+        }
+      }
+
+      // field comparators identified the field(s) values as equal
+      // Document with lowest identifier has higher precendence
+      if (doc1 < doc2) {
+        return 1;
+      } else if (doc1 > doc2) {
+        return -1;
+      }
+
+      // can only happen if comparing two the exact same document (with same lucene id)
+      return 0;
+    }
+
+    private void initializeFieldComparators(Sort sort, int numberOfHits, IndexReader indexReader, float[] scores) {
+      try {
+        scorer = new PredefinedScorer(scores == null ? new float[0] : scores);
+        if (sort.getSort().length == 1 && scores != null) {
+          fieldComparators[0] = new FloatValueFieldComparator(scores);
+          descending[0] = sort.getSort()[0].getReverse();
+          return;
+        }
+        
+        for (int i = 0; i < sort.getSort().length; i++) {
+          SortField sortField = sort.getSort()[i];
+          fieldComparators[i] = sortField.getComparator(numberOfHits, i, sortField.getReverse());
+          descending[i] = sortField.getReverse();
+          fieldComparators[i].setNextReader(indexReader, 0, numberOfHits);
+          fieldComparators[i].setScorer(scorer);
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+  }
+
+  /**
+   * Represents a collapse group, that collects statistics for a certain fieldvalue group.
+   * <p/>
+   * Keeps track of the following statistics during the collapsing on fieldvalue:
+   * <ul>
+   * <li>how many documents were collapsed
+   * <li>what the most relevant head documents are in this group (these will not get collapsed)
+   * <li>how many have been procesed
+   * </ul>
+   */
+  private static class CollapsedDocumentGroup {
+
+    int collapsedDocuments;
+    int totalCount;
+    final DocumentPriorityQueue priorityQueue;
+
+    /**
+     * Constructs a <code>CollapsedDocumentGroup</code>. Keeps track of the of the most relevant documents
+     * in this group. These documents will stay in the resultset and do not get collaped, no more then the
+     * specified collpasThreshold will be kept inside this <code>CollapsedDocumentGroup</code>.
+     *
+     * @param totalCount         the total amount documents processed in the collapsing process
+     * @param collapsedDocuments the amount of documents collapsed under this group
+     * @param comparator         The document comparater used inside priority queue
+     * @param collapsThreshold   The threshold to start collapsing from
+     */
+    private CollapsedDocumentGroup(int totalCount, int collapsedDocuments, DocumentComparator comparator, int collapsThreshold) {
+      this.totalCount = totalCount;
+      this.collapsedDocuments = collapsedDocuments;
+      this.priorityQueue = new DocumentPriorityQueue(comparator, collapsThreshold);
+    }
+  }
+
+  /**
+   * A scorer that returns scores from a predefined array of scores.
+   */
+  private static final class PredefinedScorer extends Scorer {
+
+    private final float[] scores;
+    private int index;
+
+    private PredefinedScorer(float[] scores) {
+      super(null);
+      this.scores = scores;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public float score() throws IOException {
+      return scores[index];
+    }
+
+    /**
+     * Unsupported because this scorer contains predefined calculated scores, that cannot be explained.
+     *
+     * @param doc The document identifier
+     * @return unsupported operation exception
+     * @throws IOException
+     */
+    public Explanation explain(int doc) throws IOException {
+      throw new UnsupportedOperationException("Unsupported method");
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int doc() {
+      return index;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean next() throws IOException {
+      if (index < scores.length) {
+        index++;
+        return true;
+      }
+      return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean skipTo(int target) throws IOException {
+      if (target < scores.length) {
+        index = target;
+        return true;
+      }
+      return false;
+    }
+
+  }
+
+  /**
+   * Field comparator for floats. This implementation is a little bit faster (around 30 ms) then {@link FloatComparator.FloatComparator}
+   * because it does not copy floats in {@link #copy(int, int)}. This is not necessary in fieldcollapsing case, because
+   * the arguments passed to the {@link #compare(int, int)} method are document ids and the values array is sorted on
+   * document order.
+   */
+  private static class FloatValueFieldComparator extends FieldComparator {
+
+    private final float[] values;
+
+    private FloatValueFieldComparator(float[] values) {
+      this.values = values;
+    }
+
+    /**
+     * Compares value for doc1 with value for doc2.
+     *
+     * @param doc1 first document to compare
+     * @param doc2 second document to compare
+     * @return any N < 0 if doc2's value is sorted after
+     *         doc1, any N > 0 if the doc2's value is sorted before
+     *         doc1 and 0 if they are equal
+     */
+    public int compare(int doc1, int doc2) {
+      final float value1 = values[doc1];
+      final float value2 = values[doc2];
+
+      if (value1 > value2) {
+        return 1;
+      } else if (value1 < value2) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
+
+    /**
+     * Unsupported, is not necessary for field collapsing
+     *
+     * @param doc The document identifier
+     * @throws UnsupportedOperationException
+     */
+    public void setBottom(int doc) {
+      throw new UnsupportedOperationException("Method not implemented");
+    }
+
+    /**
+     * Unsupported, is not necessary for field collapsing
+     *
+     * @param doc The document identifier
+     * @return nothing
+     * @throws UnsupportedOperationException
+     */
+    public int compareBottom(int doc) throws IOException {
+      throw new UnsupportedOperationException("Method not implemented");
+    }
+
+    /**
+     * {@inheritDoc}
+     * <br /><br />
+     * Does nothing, necessary to implement because of superclass.
+     */
+    public void copy(int slot, int doc) throws IOException {
+    }
+
+    /**
+     * {@inheritDoc}
+     * <br /><br />
+     * Does nothing, necessary to implement because of superclass.
+     */
+    public void setNextReader(IndexReader reader, int docBase, int numSlotsFull) throws IOException {
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int sortType() {
+      return SortField.FLOAT;
+    }
+
+    /**
+     * Unsupported, is not necessary for field collapsing
+     *
+     * @param doc The document identifier
+     * @throws UnsupportedOperationException
+     */
+    public Comparable value(int doc) {
+      throw new UnsupportedOperationException("Method not implemented");
+    }
+
+  }
+
+}
Index: src/common/org/apache/solr/common/params/CollapseParams.java
===================================================================
--- src/common/org/apache/solr/common/params/CollapseParams.java	Wed Jun 17 20:25:55 CEST 2009
+++ src/common/org/apache/solr/common/params/CollapseParams.java	Wed Jun 17 20:25:55 CEST 2009
@@ -0,0 +1,81 @@
+package org.apache.solr.common.params;
+
+import org.apache.solr.common.SolrException;
+
+public interface CollapseParams {
+  
+  /**
+   * The field to collapse results on.
+   */
+  public static final String COLLAPSE_FIELD = "collapse.field";
+  
+  /**
+   * Type of collapsing to perform: "normal" or "adjacent".
+   */
+  public static final String COLLAPSE_TYPE = "collapse.type";
+  
+  public enum CollapseType {
+    NORMAL, ADJACENT;
+    
+    public String toString() {
+      return super.toString().toLowerCase();
+    }
+    
+    public static CollapseType get(String label) {
+      try {
+        return valueOf(label.toUpperCase());
+      } catch (IllegalArgumentException e) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, label
+            + " is not a valid type of field collapsing", e);
+      }
+    }
+  }
+  
+  /**
+   * Apply faceting before or after collapsing.
+   */
+  public static final String COLLAPSE_FACET = "collapse.facet";
+  
+  public enum CollapseFacet {
+    BEFORE, AFTER;
+    
+    public String toString() {
+      return super.toString().toLowerCase();
+    }
+    
+    public static CollapseFacet get(String label) {
+      try {
+        return valueOf(label.toUpperCase());
+      } catch (IllegalArgumentException e) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, label
+            + " is not a valid faceting mode for field collapsing", e);
+      }
+    }
+  }
+  
+  /**
+   * The number of documents with the same value for collapse.field after which
+   * collapsing kicks in.
+   */
+  public static final String COLLAPSE_THRESHOLD = "collapse.threshold";
+  
+  /**
+   * @deprecated Deprecated in favour of collapse.threshold.
+   */
+  public static final String COLLAPSE_MAX = "collapse.max";
+  
+  /**
+   * Maximum number of documents to process during field collapsing.
+   */
+  public static final String COLLAPSE_MAXDOCS = "collapse.maxdocs";
+  
+  /**
+   * Return collapse count for each document? Defaults to true.
+   */
+  public static final String COLLAPSE_INFO_DOC = "collapse.info.doc";
+  
+  /**
+   * Return collapse count for each field value? Defaults to true.
+   */
+  public static final String COLLAPSE_INFO_COUNT = "collapse.info.count";
+}
Index: src/test/org/apache/solr/search/CollapseFilterTest.java
===================================================================
--- src/test/org/apache/solr/search/CollapseFilterTest.java	Wed Jun 17 20:25:55 CEST 2009
+++ src/test/org/apache/solr/search/CollapseFilterTest.java	Wed Jun 17 20:25:55 CEST 2009
@@ -0,0 +1,191 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import junit.framework.TestCase;
+
+import java.util.Map;
+
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+
+/**
+ * Unit tests for {@link CollapseFilter}.
+ *
+ * @author Martijn van Groningen martijn.v.groningen@gmail.com
+ */
+public class CollapseFilterTest extends TestCase {
+
+  private CollapseFilter collapseFilter;
+
+  public void testNormalCollapse_collapseThresholdOne() {
+    collapseFilter = new CollapseFilter(Integer.MAX_VALUE, 1);
+    float[] scores = new float[]{1.0f, 2.0f, 0.1f, 1.5f, 2.5f, 0.1f, 0.1f};
+    Sort sort = new Sort(new SortField("", SortField.SCORE, true));
+    CollapseFilter.DocumentComparator comparator = new CollapseFilter.DocumentComparator(sort, scores.length, null, scores);
+    String[] values = new String[]{"a", "b", "a", "c", "b", "a", "b"};
+    int[] docIds = new int[]{1, 2, 0, 3, 4, 5, 6};
+    DocSet uncollapsedDocset = new HashDocSet(docIds, 0, 7);
+    collapseFilter.normalCollapse(uncollapsedDocset, values, comparator);
+
+    Map<Integer, Integer> collapseCounts = collapseFilter.getCollapseCounts();
+    assertEquals(2, collapseCounts.size());
+    assertEquals(2, (int) collapseCounts.get(0));
+    assertEquals(2, (int) collapseCounts.get(4));
+
+    DocSet docHeadsDocSet = collapseFilter.getDocSet();
+    assertEquals(3, docHeadsDocSet.size());
+
+    boolean zeroFound = false;
+    boolean oneFound = false;
+    boolean twoFound = false;
+    boolean threeFound = false;
+    boolean fourFound = false;
+    boolean fiveFound = false;
+    boolean sixFound = false;
+    for (DocIterator i = docHeadsDocSet.iterator(); i.hasNext();) {
+      int docId = i.nextDoc();
+      if (docId == 0) {
+        zeroFound = true;
+      } else if (docId == 1) {
+        oneFound = true;
+      } else if (docId == 2) {
+        twoFound = true;
+      } else if (docId == 3) {
+        threeFound = true;
+      } else if (docId == 4) {
+        fourFound = true;
+      } else if (docId == 5) {
+        fiveFound = true;
+      } else if (docId == 6) {
+        sixFound = true;
+      }
+    }
+
+    assertTrue(zeroFound);
+    assertFalse(oneFound);
+    assertFalse(twoFound);
+    assertTrue(threeFound);
+    assertTrue(fourFound);
+    assertFalse(fiveFound);
+    assertFalse(sixFound);
+  }
+
+  public void testNormalCollapse_collapseThresholdThree() {
+    collapseFilter = new CollapseFilter(Integer.MAX_VALUE, 3);
+    float[] scores = new float[]{1.0f, 2.0f, 0.1f, 1.5f, 2.5f, 0.1f, 0.1f};
+    Sort sort = new Sort(new SortField("", SortField.SCORE, true));
+    CollapseFilter.DocumentComparator comparator = new CollapseFilter.DocumentComparator(sort, scores.length, null, scores);
+    String[] values = new String[]{"a", "a", "a", "c", "b", "a", "b"};
+    int[] docIds = new int[]{1, 2, 0, 3, 4, 5, 6};
+    DocSet uncollapsedDocset = new HashDocSet(docIds, 0, 7);
+    collapseFilter.normalCollapse(uncollapsedDocset, values, comparator);
+
+    Map<Integer, Integer> collapseCounts = collapseFilter.getCollapseCounts();
+    assertEquals(1, collapseCounts.size());
+    assertEquals(1, (int) collapseCounts.get(2));
+
+    DocSet docHeadsDocSet = collapseFilter.getDocSet();
+    assertEquals(6, docHeadsDocSet.size());
+
+    boolean zeroFound = false;
+    boolean oneFound = false;
+    boolean twoFound = false;
+    boolean threeFound = false;
+    boolean fourFound = false;
+    boolean fiveFound = false;
+    boolean sixFound = false;
+    for (DocIterator i = docHeadsDocSet.iterator(); i.hasNext();) {
+      int docId = i.nextDoc();
+      if (docId == 0) {
+        zeroFound = true;
+      } else if (docId == 1) {
+        oneFound = true;
+      } else if (docId == 2) {
+        twoFound = true;
+      } else if (docId == 3) {
+        threeFound = true;
+      } else if (docId == 4) {
+        fourFound = true;
+      } else if (docId == 5) {
+        fiveFound = true;
+      } else if (docId == 6) {
+        sixFound = true;
+      }
+    }
+
+    assertTrue(zeroFound);
+    assertTrue(oneFound);
+    assertTrue(twoFound);
+    assertTrue(threeFound);
+    assertTrue(fourFound);
+    assertFalse(fiveFound);
+    assertTrue(sixFound);
+  }
+
+  public void testAdjacentCollapse() {
+    collapseFilter = new CollapseFilter(Integer.MAX_VALUE, 1);
+    // adjacent collapsing can only works with an order docset
+    String[] values = new String[]{"z", "a", "a", "b", "c", "b", "b"};
+    int[] docIds = new int[]{0, 1, 2, 3, 4, 5, 6};
+    DocSet uncollapsedDocset = new HashDocSet(docIds, 0, 7);
+    collapseFilter.adjacentCollapse(uncollapsedDocset, values);
+
+    Map<Integer, Integer> collapseCounts = collapseFilter.getCollapseCounts();
+    assertEquals(2, collapseCounts.size());
+    assertEquals(1, (int) collapseCounts.get(1));
+    assertEquals(1, (int) collapseCounts.get(5));
+
+    DocSet docHeadsDocSet = collapseFilter.getDocSet();
+    assertEquals(5, docHeadsDocSet.size());
+
+    boolean zeroFound = false;
+    boolean oneFound = false;
+    boolean twoFound = false;
+    boolean threeFound = false;
+    boolean fourFound = false;
+    boolean fiveFound = false;
+    boolean sixFound = false;
+    for (DocIterator i = docHeadsDocSet.iterator(); i.hasNext();) {
+      int docId = i.nextDoc();
+      if (docId == 0) {
+        zeroFound = true;
+      } else if (docId == 1) {
+        oneFound = true;
+      } else if (docId == 2) {
+        twoFound = true;
+      } else if (docId == 3) {
+        threeFound = true;
+      } else if (docId == 4) {
+        fourFound = true;
+      } else if (docId == 5) {
+        fiveFound = true;
+      } else if (docId == 6) {
+        sixFound = true;
+      }
+    }
+
+    assertTrue(zeroFound);
+    assertTrue(oneFound);
+    assertFalse(twoFound);
+    assertTrue(threeFound);
+    assertTrue(fourFound);
+    assertTrue(fiveFound);
+    assertFalse(sixFound);
+  }
+
+}
