Index: lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java	(revision 1160714)
+++ lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java	(working copy)
@@ -69,37 +69,29 @@
   /** The tolerance range for float equality. */
   private static float FLOAT_EPSILON = 1e-5f;
   /** The DFR basic models to test. */
-  private static BasicModel[] BASIC_MODELS;
+  static BasicModel[] BASIC_MODELS = {
+    new BasicModelBE(), new BasicModelD(), new BasicModelG(),
+    new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
+    new BasicModelP()
+  };
   /** The DFR aftereffects to test. */
-  private static AfterEffect[] AFTER_EFFECTS;
+  static AfterEffect[] AFTER_EFFECTS = {
+    new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect()
+  };
   /** The DFR normalizations to test. */
-  private static Normalization[] NORMALIZATIONS;
+  static Normalization[] NORMALIZATIONS = {
+    new NormalizationH1(), new NormalizationH2(),
+    new Normalization.NoNormalization()
+  };
   /** The distributions for IB. */
-  private static Distribution[] DISTRIBUTIONS;
+  static Distribution[] DISTRIBUTIONS = {
+    new DistributionLL(), new DistributionSPL()
+  };
   /** Lambdas for IB. */
-  private static Lambda[] LAMBDAS;
+  static Lambda[] LAMBDAS = {
+    new LambdaDF(), new LambdaTTF()
+  };
   
-  static {
-    BASIC_MODELS = new BasicModel[] {
-        new BasicModelBE(), new BasicModelD(), new BasicModelG(),
-        new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
-        new BasicModelP()
-    };
-    AFTER_EFFECTS = new AfterEffect[] {
-        new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect()
-    };
-    NORMALIZATIONS = new Normalization[] {
-        new NormalizationH1(), new NormalizationH2(),
-        new Normalization.NoNormalization()
-    };
-    DISTRIBUTIONS = new Distribution[] {
-        new DistributionLL(), new DistributionSPL()
-    };
-    LAMBDAS = new Lambda[] {
-        new LambdaDF(), new LambdaTTF()
-    };
-  }
-  
   private IndexSearcher searcher;
   private Directory dir;
   private IndexReader reader;
Index: lucene/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java	(revision 0)
+++ lucene/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java	(revision 0)
@@ -0,0 +1,153 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Tests against all the similarities we have
+ */
+public class TestSimilarity2 extends LuceneTestCase {
+  List<SimilarityProvider> simProviders;
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    simProviders = new ArrayList<SimilarityProvider>();
+    simProviders.add(new BasicSimilarityProvider(new DefaultSimilarity()));
+    simProviders.add(new BasicSimilarityProvider(new BM25Similarity()));
+    // TODO: not great that we dup this all with TestSimilarityBase
+    for (BasicModel basicModel : TestSimilarityBase.BASIC_MODELS) {
+      for (AfterEffect afterEffect : TestSimilarityBase.AFTER_EFFECTS) {
+        for (Normalization normalization : TestSimilarityBase.NORMALIZATIONS) {
+          simProviders.add(new BasicSimilarityProvider(new DFRSimilarity(basicModel, afterEffect, normalization)));
+        }
+      }
+    }
+    for (Distribution distribution : TestSimilarityBase.DISTRIBUTIONS) {
+      for (Lambda lambda : TestSimilarityBase.LAMBDAS) {
+        for (Normalization normalization : TestSimilarityBase.NORMALIZATIONS) {
+          simProviders.add(new BasicSimilarityProvider(new IBSimilarity(distribution, lambda, normalization)));
+        }
+      }
+    }
+    simProviders.add(new BasicSimilarityProvider(new LMDirichletSimilarity()));
+    simProviders.add(new BasicSimilarityProvider(new LMJelinekMercerSimilarity(0.1f)));
+    simProviders.add(new BasicSimilarityProvider(new LMJelinekMercerSimilarity(0.7f)));
+  }
+  
+  /** because of stupid things like querynorm, its possible we computeStats on a field that doesnt exist at all
+   *  test this against a totally empty index, to make sure sims handle it
+   */
+  public void testEmptyIndex() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random, dir);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    
+    for (SimilarityProvider simProvider : simProviders) {
+      is.setSimilarityProvider(simProvider);
+      assertEquals(0, is.search(new TermQuery(new Term("foo", "bar")), 10).totalHits);
+    }
+    is.close();
+    ir.close();
+    dir.close();
+  }
+  
+  /** similar to the above, but ORs the query with a real field */
+  public void testEmptyField() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random, dir);
+    Document doc = new Document();
+    doc.add(newField("foo", "bar", Field.Index.ANALYZED));
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    
+    for (SimilarityProvider simProvider : simProviders) {
+      is.setSimilarityProvider(simProvider);
+      BooleanQuery query = new BooleanQuery(true);
+      query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
+      query.add(new TermQuery(new Term("bar", "baz")), BooleanClause.Occur.SHOULD);
+      assertEquals(1, is.search(query, 10).totalHits);
+    }
+    is.close();
+    ir.close();
+    dir.close();
+  }
+  
+  /** similar to the above, however the field exists, but we query with a term that doesnt exist too */
+  public void testEmptyTerm() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random, dir);
+    Document doc = new Document();
+    doc.add(newField("foo", "bar", Field.Index.ANALYZED));
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    
+    for (SimilarityProvider simProvider : simProviders) {
+      is.setSimilarityProvider(simProvider);
+      BooleanQuery query = new BooleanQuery(true);
+      query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
+      query.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD);
+      assertEquals(1, is.search(query, 10).totalHits);
+    }
+    is.close();
+    ir.close();
+    dir.close();
+  }
+  
+  /** make sure we can retrieve when norms are disabled */
+  public void testNoNorms() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random, dir);
+    Document doc = new Document();
+    doc.add(newField("foo", "bar", Field.Index.ANALYZED_NO_NORMS));
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    
+    for (SimilarityProvider simProvider : simProviders) {
+      is.setSimilarityProvider(simProvider);
+      BooleanQuery query = new BooleanQuery(true);
+      query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
+      assertEquals(1, is.search(query, 10).totalHits);
+    }
+    is.close();
+    ir.close();
+    dir.close();
+  }
+}

Property changes on: lucene/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java	(revision 1160714)
+++ lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java	(working copy)
@@ -23,6 +23,7 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.util.BytesRef;
@@ -82,9 +83,6 @@
       String fieldName, TermContext... termContexts) throws IOException {
     IndexReader reader = searcher.getIndexReader();
     int numberOfDocuments = reader.maxDoc();
-    long numberOfFieldTokens = MultiFields.getTerms(searcher.getIndexReader(),
-        fieldName).getSumTotalTermFreq();
-    float avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
     
     // nocommit Take the minimum of term frequencies for phrases. This is not
     // correct though, we'll need something like a scorePhrase(MultiStats ...)
@@ -94,18 +92,35 @@
       docFreq = Math.min(docFreq, context.docFreq());
       totalTermFreq = Math.min(totalTermFreq, context.totalTermFreq());
     }
-    
-    // We have to provide something if codec doesnt supply these measures,
-    // or if someone omitted frequencies for the field... negative values cause
-    // NaN/Inf for some scorers.
-    if (numberOfFieldTokens == -1) {
-      numberOfFieldTokens = docFreq;
-      avgFieldLength = 1;
-    }
+
+    // codec does not supply totalTermFreq: substitute docFreq
     if (totalTermFreq == -1) {
       totalTermFreq = docFreq;
     }
+
+    final long numberOfFieldTokens;
+    final float avgFieldLength;
     
+    Terms terms = MultiFields.getTerms(searcher.getIndexReader(), fieldName);
+    if (terms == null) {
+      // field does not exist;
+      numberOfFieldTokens = 0;
+      avgFieldLength = 1;
+    } else {
+      long sumTotalTermFreq = terms.getSumTotalTermFreq();
+
+      // We have to provide something if codec doesnt supply these measures,
+      // or if someone omitted frequencies for the field... negative values cause
+      // NaN/Inf for some scorers.
+      if (sumTotalTermFreq == -1) {
+        numberOfFieldTokens = docFreq;
+        avgFieldLength = 1;
+      } else {
+        numberOfFieldTokens = sumTotalTermFreq;
+        avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
+      }
+    }
+ 
     stats.setNumberOfDocuments(numberOfDocuments);
     stats.setNumberOfFieldTokens(numberOfFieldTokens);
     stats.setAvgFieldLength(avgFieldLength);
