Index: lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java	(revision 1165820)
+++ lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java	(working copy)
@@ -189,7 +189,7 @@
         new OrdTermState(), 0, stats.getDocFreq(), stats.getTotalTermFreq());
     
     for (SimilarityBase sim : sims) {
-      BasicStats realStats = sim.computeStats(new SpoofIndexSearcher(stats),
+      BasicStats realStats = (BasicStats) sim.computeStats(new SpoofIndexSearcher(stats),
           "spoof", stats.getTotalBoost(), tc);
       float score = sim.score(realStats, freq, docLen);
       float explScore = sim.explain(
@@ -290,7 +290,8 @@
     BasicStats stats = createStats();
     stats.setTotalTermFreq(stats.getNumberOfFieldTokens());
     unitTestCore(stats, DOC_LEN, DOC_LEN);
-    // nocommit docLen > avglength
+    stats.setAvgFieldLength(DOC_LEN + 10);
+    unitTestCore(stats, DOC_LEN, DOC_LEN);
   }
 
   /**
@@ -447,32 +448,32 @@
   }
   
   /** Correctness test for the GL1 DFR model. */
-  @Ignore("nocommit")
   public void testGL1() throws IOException {
     SimilarityBase sim = new DFRSimilarity(
         new BasicModelG(), new AfterEffectL(), new NormalizationH1());
-    correctnessTestCore(sim, 1.22733118352f);
+    correctnessTestCore(sim, 1.6463143825531006f);
   }
   
   /** Correctness test for the BEB1 DFR model. */
-  @Ignore("nocommit")
   public void testBEB1() throws IOException {
     SimilarityBase sim = new DFRSimilarity(
         new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
     float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN;  // 8.75
     float b = (TOTAL_TERM_FREQ + 1) / (DOC_FREQ * (tfn + 1));  // 0.728205128205
-    float n1 = NUMBER_OF_DOCUMENTS + 1 + TOTAL_TERM_FREQ - 1;        // 170
-    float m1 = NUMBER_OF_DOCUMENTS + 1 + TOTAL_TERM_FREQ - tfn - 2;  // 160.25
-    float n2 = TOTAL_TERM_FREQ;                                      // 70
-    float m2 = TOTAL_TERM_FREQ - tfn;                                // 61.25
-    float be = (float)(-SimilarityBase.log2(NUMBER_OF_DOCUMENTS + 1 - 1) -
-               SimilarityBase.log2(Math.E) +                   // -8.08655123066
+    float f = TOTAL_TERM_FREQ + tfn;
+    float n = f + NUMBER_OF_DOCUMENTS;
+    float n1 = n + f - 1;        // 256.5
+    float m1 = n + f - tfn - 2;  // 246.75
+    float n2 = f;                                      // 78.75
+    float m2 = f - tfn;                                // 70.0
+    float be = (float)(-SimilarityBase.log2(n - 1) -
+               SimilarityBase.log2(Math.E) +                   // -8.916400790508378
                ((m1 + 0.5f) * SimilarityBase.log2(n1 / m1) +
-                (n1 - m1) * SimilarityBase.log2(n1)) -         // 85.9391317425
+                (n1 - m1) * SimilarityBase.log2(n1)) -         // 91.85089272283668
                ((m2 + 0.5f) * SimilarityBase.log2(n2 / m2) +
-                (n2 - m2) * SimilarityBase.log2(n2)));         // 65.5270599612
-               // 12.3255205506
-    float gold = b * be;                                       // 8.97550727277
+                (n2 - m2) * SimilarityBase.log2(n2)));         // 67.09778276257171
+               // 15.836709
+    float gold = b * be;                                       // 11.532373
     correctnessTestCore(sim, gold);
   }
 
@@ -527,7 +528,7 @@
         searcher.getIndexReader().getTopReaderContext(),
         new OrdTermState(), 0, stats.getDocFreq(), stats.getTotalTermFreq());
     
-    BasicStats realStats = sim.computeStats(
+    BasicStats realStats = (BasicStats) sim.computeStats(
         searcher, "spoof", stats.getTotalBoost(), tc);
     float score = sim.score(realStats, FREQ, DOC_LEN);
     assertEquals(
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java	(revision 1164294)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java	(working copy)
@@ -20,7 +20,9 @@
 import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
- * Geometric as limiting form of the Bose-Einstein model.
+ * Geometric as limiting form of the Bose-Einstein model.  The formula used in Lucene differs
+ * slightly from the one in the original paper: {@code F} is increased by {@code tfn}
+ * and {@code N} is increased by {@code F}.
  * @lucene.experimental
  */
 public class BasicModelG extends BasicModel {
Index: lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java	(revision 1163308)
+++ lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java	(working copy)
@@ -52,17 +52,20 @@
     this(new DefaultCollectionModel());
   }
   
+  @Override
+  protected BasicStats newStats(float queryBoost) {
+    return new LMStats(queryBoost);
+  }
+
   /**
    * Computes the collection probability of the current term in addition to the
    * usual statistics.
    */
   @Override
-  public BasicStats computeStats(IndexSearcher searcher, String fieldName,
-      float queryBoost, TermContext... termContexts) throws IOException {
-    LMStats stats = new LMStats(queryBoost);
-    fillBasicStats(stats, searcher, fieldName, termContexts);
-    stats.setCollectionProbability(collectionModel.computeProbability(stats));
-    return stats;
+  protected void fillBasicStats(BasicStats stats, IndexSearcher searcher, String fieldName, TermContext termContext) throws IOException {
+    super.fillBasicStats(stats, searcher, fieldName, termContext);
+    LMStats lmStats = (LMStats) stats;
+    lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
   }
 
   @Override
Index: lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java	(revision 1164294)
+++ lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java	(working copy)
@@ -38,6 +38,11 @@
  * inasmuch as SimilarityBase already provides a basic explanation of the score
  * and the term frequency. However, implementers of a subclass are encouraged to
  * include as much detail about the scoring method as possible.
+ * <p>
+ * Note: multi-word queries such as phrase queries are scored in a different way
+ * than Lucene's default ranking algorithm: whereas it "fakes" an IDF value for
+ * the phrase as a whole (since it does not know it), this class instead scores
+ * phrases as a summation of the individual term scores.
  * @lucene.experimental
  */
 public abstract class SimilarityBase extends Similarity {
@@ -65,33 +70,31 @@
     return discountOverlaps;
   }
   
-  /**
-   * Calls {@link #fillBasicStats(BasicStats, IndexSearcher, String, TermContext...)}.
-   * Subclasses that override this method may invoke {@code fillStats} with any
-   * subclass of {@code BasicStats}.
-   */
   @Override
-  public BasicStats computeStats(IndexSearcher searcher, String fieldName,
+  public final Stats computeStats(IndexSearcher searcher, String fieldName,
       float queryBoost, TermContext... termContexts) throws IOException {
-    BasicStats stats = new BasicStats(queryBoost);
-    fillBasicStats(stats, searcher, fieldName, termContexts);
-    return stats;
+    BasicStats stats[] = new BasicStats[termContexts.length];
+    for (int i = 0; i < termContexts.length; i++) {
+      stats[i] = newStats(queryBoost);
+      fillBasicStats(stats[i], searcher, fieldName, termContexts[i]);
+    }
+    return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
   }
   
-  /** Fills all member fields defined in {@code BasicStats} in {@code stats}. */
-  protected final void fillBasicStats(BasicStats stats, IndexSearcher searcher,
-      String fieldName, TermContext... termContexts) throws IOException {
+  /** Factory method to return a custom stats object */
+  protected BasicStats newStats(float queryBoost) {
+    return new BasicStats(queryBoost);
+  }
+  
+  /** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
+   *  Subclasses can override this method to fill additional stats. */
+  protected void fillBasicStats(BasicStats stats, IndexSearcher searcher,
+      String fieldName, TermContext termContext) throws IOException {
     IndexReader reader = searcher.getIndexReader();
     int numberOfDocuments = reader.maxDoc();
     
-    // nocommit Take the minimum of term frequencies for phrases. This is not
-    // correct though, we'll need something like a scorePhrase(MultiStats ...)
-    int docFreq = Integer.MAX_VALUE;
-    long totalTermFreq = Integer.MAX_VALUE;
-    for (final TermContext context : termContexts) {
-      docFreq = Math.min(docFreq, context.docFreq());
-      totalTermFreq = Math.min(totalTermFreq, context.totalTermFreq());
-    }
+    int docFreq = termContext.docFreq();
+    long totalTermFreq = termContext.totalTermFreq();
 
     // codec does not supply totalTermFreq: substitute docFreq
     if (totalTermFreq == -1) {
@@ -121,6 +124,7 @@
       }
     }
  
+    // TODO: add sumDocFreq for field (numberOfFieldPostings)
     stats.setNumberOfDocuments(numberOfDocuments);
     stats.setNumberOfFieldTokens(numberOfFieldTokens);
     stats.setAvgFieldLength(avgFieldLength);
@@ -185,15 +189,39 @@
   @Override
   public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
       AtomicReaderContext context) throws IOException {
-    return new BasicExactDocScorer((BasicStats) stats,
-                                  context.reader.norms(fieldName));
+    byte norms[] = context.reader.norms(fieldName);
+    
+    if (stats instanceof MultiSimilarity.MultiStats) {
+      // a multi term query (e.g. phrase). return the summation, 
+      // scoring almost as if it were boolean query
+      Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
+      ExactDocScorer subScorers[] = new ExactDocScorer[subStats.length];
+      for (int i = 0; i < subScorers.length; i++) {
+        subScorers[i] = new BasicExactDocScorer((BasicStats)subStats[i], norms);
+      }
+      return new MultiSimilarity.MultiExactDocScorer(subScorers);
+    } else {
+      return new BasicExactDocScorer((BasicStats) stats, norms);
+    }
   }
   
   @Override
   public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
       AtomicReaderContext context) throws IOException {
-    return new BasicSloppyDocScorer((BasicStats) stats,
-                                   context.reader.norms(fieldName));
+    byte norms[] = context.reader.norms(fieldName);
+    
+    if (stats instanceof MultiSimilarity.MultiStats) {
+      // a multi term query (e.g. phrase). return the summation, 
+      // scoring almost as if it were boolean query
+      Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
+      SloppyDocScorer subScorers[] = new SloppyDocScorer[subStats.length];
+      for (int i = 0; i < subScorers.length; i++) {
+        subScorers[i] = new BasicSloppyDocScorer((BasicStats)subStats[i], norms);
+      }
+      return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
+    } else {
+      return new BasicSloppyDocScorer((BasicStats) stats, norms);
+    }
   }
   
   /**
@@ -201,7 +229,7 @@
    * and preferably the values of parameters (if any) as well.
    */
   @Override
-  public abstract String toString();  // nocommit: to Similarity?
+  public abstract String toString();
 
   // ------------------------------ Norm handling ------------------------------
   
Index: lucene/src/java/org/apache/lucene/search/similarities/NormalizationH3.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/NormalizationH3.java	(revision 1165820)
+++ lucene/src/java/org/apache/lucene/search/similarities/NormalizationH3.java	(working copy)
@@ -33,7 +33,6 @@
 
   @Override
   public float tfn(BasicStats stats, float tf, float len) {
-    System.out.println(stats.getTotalTermFreq() + " / " + stats.getNumberOfFieldTokens());
     return (tf + mu * (stats.getTotalTermFreq() / (float)stats.getNumberOfFieldTokens())) / (len + mu) * mu;
   }
 
Index: lucene/src/java/org/apache/lucene/search/similarities/Similarity.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/Similarity.java	(revision 1163308)
+++ lucene/src/java/org/apache/lucene/search/similarities/Similarity.java	(working copy)
@@ -146,7 +146,7 @@
    * <p>
    * Term frequencies are integers (the term or phrase's tf)
    */
-  public abstract class ExactDocScorer {
+  public static abstract class ExactDocScorer {
     /**
      * Score a single document
      * @param doc document id
@@ -175,7 +175,7 @@
    * <p>
    * Term frequencies are floating point values.
    */
-  public abstract class SloppyDocScorer {
+  public static abstract class SloppyDocScorer {
     /**
      * Score a single document
      * @param doc document id
Index: lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java	(revision 0)
@@ -0,0 +1,159 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * Implements the CombSUM method for combining evidence from multiple
+ * similarity values described in: Joseph A. Shaw, Edward A. Fox. 
+ * In Text REtrieval Conference (1993), pp. 243-252
+ * @lucene.experimental
+ */
+public class MultiSimilarity extends Similarity {
+  protected final Similarity sims[];
+  
+  public MultiSimilarity(Similarity sims[]) {
+    this.sims = sims;
+  }
+  
+  @Override
+  public byte computeNorm(FieldInvertState state) {
+    return sims[0].computeNorm(state);
+  }
+
+  @Override
+  public Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termContexts) throws IOException {
+    Stats subStats[] = new Stats[sims.length];
+    for (int i = 0; i < subStats.length; i++) {
+      subStats[i] = sims[i].computeStats(searcher, fieldName, queryBoost, termContexts);
+    }
+    return new MultiStats(subStats);
+  }
+
+  @Override
+  public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
+    ExactDocScorer subScorers[] = new ExactDocScorer[sims.length];
+    for (int i = 0; i < subScorers.length; i++) {
+      subScorers[i] = sims[i].exactDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
+    }
+    return new MultiExactDocScorer(subScorers);
+  }
+
+  @Override
+  public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
+    SloppyDocScorer subScorers[] = new SloppyDocScorer[sims.length];
+    for (int i = 0; i < subScorers.length; i++) {
+      subScorers[i] = sims[i].sloppyDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
+    }
+    return new MultiSloppyDocScorer(subScorers);
+  }
+  
+  public static class MultiExactDocScorer extends ExactDocScorer {
+    private final ExactDocScorer subScorers[];
+    
+    MultiExactDocScorer(ExactDocScorer subScorers[]) {
+      this.subScorers = subScorers;
+    }
+    
+    @Override
+    public float score(int doc, int freq) {
+      float sum = 0.0f;
+      for (ExactDocScorer subScorer : subScorers) {
+        sum += subScorer.score(doc, freq);
+      }
+      return sum;
+    }
+
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
+      for (ExactDocScorer subScorer : subScorers) {
+        expl.addDetail(subScorer.explain(doc, freq));
+      }
+      return expl;
+    }
+  }
+  
+  public static class MultiSloppyDocScorer extends SloppyDocScorer {
+    private final SloppyDocScorer subScorers[];
+    
+    MultiSloppyDocScorer(SloppyDocScorer subScorers[]) {
+      this.subScorers = subScorers;
+    }
+    
+    @Override
+    public float score(int doc, float freq) {
+      float sum = 0.0f;
+      for (SloppyDocScorer subScorer : subScorers) {
+        sum += subScorer.score(doc, freq);
+      }
+      return sum;
+    }
+
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
+      for (SloppyDocScorer subScorer : subScorers) {
+        expl.addDetail(subScorer.explain(doc, freq));
+      }
+      return expl;
+    }
+
+    @Override
+    public float computeSlopFactor(int distance) {
+      return subScorers[0].computeSlopFactor(distance);
+    }
+
+    @Override
+    public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+      return subScorers[0].computePayloadFactor(doc, start, end, payload);
+    }
+  }
+
+  public static class MultiStats extends Stats {
+    final Stats subStats[];
+    
+    MultiStats(Stats subStats[]) {
+      this.subStats = subStats;
+    }
+    
+    @Override
+    public float getValueForNormalization() {
+      float sum = 0.0f;
+      for (Stats stat : subStats) {
+        sum += stat.getValueForNormalization();
+      }
+      return sum / subStats.length;
+    }
+
+    @Override
+    public void normalize(float queryNorm, float topLevelBoost) {
+      for (Stats stat : subStats) {
+        stat.normalize(queryNorm, topLevelBoost);
+      }
+    }
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java	(revision 1164294)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java	(working copy)
@@ -25,6 +25,10 @@
  * original paper: to avoid underflow for small values of {@code N} and
  * {@code F}, {@code N} is increased by {@code 1} and
  * {@code F} is always increased by {@code tfn}.
+ * <p>
+ * WARNING: for terms that do not meet the expected random distribution
+ * (e.g. stopwords), this model may give poor performance, such as
+ * abnormally high scores for low tf values.
  * @lucene.experimental
  */
 public class BasicModelD extends BasicModel {
@@ -33,8 +37,6 @@
     // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
     // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
     // to create a 'normalized' F.
-    // nocommit: we need a better fix here when F >= N: using lambda = F / (N + F) still 
-    // suffers with problems if you use AfterEffectB, but DL2 seems ok (http://dl.acm.org/citation.cfm?id=1672962
     double F = stats.getTotalTermFreq() + tfn;
     double phi = (double)tfn / F;
     double nphi = 1 - phi;
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java	(revision 1164294)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java	(working copy)
@@ -21,9 +21,8 @@
 
 /**
  * Limiting form of the Bose-Einstein model. The formula used in Lucene differs
- * slightly from the one in the original paper: to avoid underflow for small
- * values of {@code N} and {@code F}, {@code N} is increased by {@code 1} and
- * {@code F} is increased by {@code tfn}. 
+ * slightly from the one in the original paper: {@code F} is increased by {@code tfn}
+ * and {@code N} is increased by {@code F} 
  * @lucene.experimental
  */
 public class BasicModelBE extends BasicModel {
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java	(revision 1164294)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java	(working copy)
@@ -22,6 +22,10 @@
 /**
  * Implements the Poisson approximation for the binomial model for DFR.
  * @lucene.experimental
+ * <p>
+ * WARNING: for terms that do not meet the expected random distribution
+ * (e.g. stopwords), this model may give poor performance, such as
+ * abnormally high scores for low tf values.
  */
 public class BasicModelP extends BasicModel {
   /** {@code log2(Math.E)}, precomputed. */
@@ -29,8 +33,6 @@
   
   @Override
   public final float score(BasicStats stats, float tfn) {
-    // nocommit: we need a better fix here when F >= N: using lambda = F / (N + F) still 
-    // suffers with problems if you use AfterEffectB, but PL2 seems ok (http://dl.acm.org/citation.cfm?id=1672962)
     float lambda = (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
     return (float)(tfn * log2(tfn / lambda)
         + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E
