Index: lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java	(revision 0)
+++ lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java	(revision 0)
@@ -0,0 +1,237 @@
+package org.apache.lucene.search.similarities;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//import java.util.*;
+import java.util.*;
+import java.io.IOException;
+
+
+//import java.lang.String;
+//import java.lang.StringBuilder;
+
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ *  * </p>
+ * <p>For the integration tests, a small (9-document) collection is indexed. The
+ * tests verify that for a specific query, all relevant documents are returned
+ * in the correct order.  Then BM25 is tested with its default parameters and a long document containing two query terms 
+ * is tested insure that it is ranked below short documents containing only one of the query terms (This is the problem that the delta parameter fixes)
+ * Then the delta parameter is added to correct BM25 and the long document is tested to insure that it is ranked above all the shorter
+ * docs that only contain one of the query terms.  
+ * 
+ * The collection consists of two poems of English poet
+ * <a href="http://en.wikipedia.org/wiki/William_blake">William Blake</a>.
+ * Plus a generated document that is approximately 6 times the average document length</p>
+ * 
+ * <p>
+ *  * 
+ */
+public class TestBM25Similarity extends LuceneTestCase {
+  private static String FIELD_BODY = "body";
+  private static String FIELD_ID = "id";
+  /** The tolerance range for float equality. */
+  private static float FLOAT_EPSILON = 1e-5f;
+ 
+  private IndexSearcher searcher;
+  private Directory dir;
+  private IndexReader reader;
+  
+    
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+
+    dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    String [] docs = getDocs();
+    for (int i = 0; i < docs.length; i++) {
+      Document d = new Document();
+      FieldType ft = new FieldType(TextField.TYPE_STORED);
+      ft.setIndexed(false);
+      d.add(newField(FIELD_ID, Integer.toString(i), ft));
+      d.add(newTextField(FIELD_BODY, docs[i], Field.Store.YES));
+      writer.addDocument(d);
+    }
+    
+    reader = writer.getReader();
+    searcher = newSearcher(reader);
+    writer.close();
+    
+  }
+  
+ 
+ 
+  // ---------------------------- Correctness tests ----------------------------
+/***
+ * Think about what should go here 
+ * look at TestSimilarityBase for ideas  
+ */
+
+  
+  // ---------------------------- Integration tests ----------------------------
+
+  /** The "collection" for the integration tests. */
+  
+  // Very long string for testing  The regular documents are between 20 and 30 words in length so this one is about 6 times avgdl or 180 words in length
+      
+  private String  getLongDoc (String s) {
+    int n =60;
+    
+    String filler = " hello there world ";
+    StringBuilder sb = new StringBuilder(filler.length() * (n+s.length()));
+    sb.append(s);
+    for (int i =0; i <n; i++){
+      sb.append(filler);
+    }
+      return sb.toString();
+  }
+ private String [] getDocs () {
+   String[] docs_list= new String[] {
+       "Tiger, tiger burning bright   In the forest of the night   What immortal hand or eye   Could frame thy fearful symmetry ?",
+       "In what distant depths or skies   Burnt the fire of thine eyes ?   On what wings dare he aspire ?   What the hands the seize the fire ?",
+       "And what shoulder and what art   Could twist the sinews of thy heart ?   And when thy heart began to beat What dread hand ? And what dread feet ?",
+       "What the hammer? What the chain ?   In what furnace was thy brain ?   What the anvil ? And what dread grasp   Dare its deadly terrors clasp ?",
+       "And when the stars threw down their spears   And water'd heaven with their tear   Did he smile his work to see ?   Did he, who made the lamb, made thee ?",
+       "Tiger, tiger burning bright   In the forest of the night   What immortal hand or eye   Dare frame thy fearful symmetry ?",
+       "Cruelty has a human heart   And jealousy a human face   Terror the human form divine   And Secrecy the human dress .",
+       "The human dress is forg'd iron   The human form a fiery forge   The human face a furnace seal'd   The human heart its fiery gorge .",
+     
+   };
+   String longDoc =getLongDoc("heart warming");
+   List <String> temp  = new ArrayList<>();
+   Collections.addAll  (temp,docs_list);
+   temp.add(longDoc);
+   String[] toReturn =  temp.toArray(new String[temp.size()]) ;
+   return toReturn;
+ }
+   
+   
+  /**
+   * Tests whether four documents for the query word
+   * "heart" are returned.
+   */
+  public void testHeartList() throws IOException {
+    Query q = new TermQuery(new Term(FIELD_BODY, "heart"));
+    Similarity sim =new BM25Similarity();
+    searcher.setSimilarity(sim);
+    TopDocs topDocs = searcher.search(q, 1000);
+    assertEquals("Failed: " + sim.toString(), 4, topDocs.totalHits);
+    
+  }
+  /**
+   * Test explanations
+   * 
+   * TODO: add tests for cases 
+   * 1) no norms
+   * 2) delta = 0
+   */
+  public void testExplanations() throws IOException {
+    Query q = new TermQuery(new Term(FIELD_BODY, "heart"));
+    float k1 = 1.2f;
+    float b = 0.75f;
+    float d = 0.5f;
+    Similarity sim =new BM25Similarity(k1,b,d);
+    searcher.setSimilarity(sim);
+    TopDocs topDocs = searcher.search(q, 10);
+    for (ScoreDoc match: topDocs.scoreDocs){
+      float score=match.score;
+      Explanation explanation = searcher.explain(q, match.doc);
+      assertEquals("score() and explain() return different values: "
+          + sim.toString(), score, explanation.getValue(), FLOAT_EPSILON);
+      
+      StoredDocument doc =searcher.doc(match.doc);//this might be wrong fix for type issue XXX
+    //  System.out.println(doc.get(FIELD_ID));
+     // System.out.println(explanation.toString());
+    }
+      
+    assertEquals("Failed: " + sim.toString(), 4, topDocs.totalHits);
+  }
+  
+  
+  /*
+   * Test whether with the BM25 default (delta =0) the long 
+   * document containing both query words "heart" and "warming"(document 8 ) is returned farther down the 
+   * list than short documents containing only one query word "heart"
+   */
+  
+  public void testHeartWarmingRanking() throws IOException {
+    Query t1 = new TermQuery(new Term(FIELD_BODY, "heart"));
+    Query t2 = new TermQuery(new Term(FIELD_BODY, "warming"));
+    BooleanQuery q = new BooleanQuery();
+    q.add(t1, BooleanClause.Occur.SHOULD);
+    q.add(t2, BooleanClause.Occur.SHOULD);
+    Similarity sim =new BM25Similarity();
+    searcher.setSimilarity(sim);
+    TopDocs topDocs = searcher.search(q, 1000);
+    assertEquals("Failed: " + sim.toString(), "2", reader.document(topDocs.scoreDocs[0].doc).get(FIELD_ID));
+    assertEquals("Failed: " + sim.toString(), "8", reader.document(topDocs.scoreDocs[3].doc).get(FIELD_ID));
+  }
+  
+  /*
+   * Test whether with  the delta parameter set to 0.5,  the long 
+   * document containing both query words "heart" and "warming"(document 8 ) is returned above the 3 other short documents which
+   * only contain one of the query words ("heart")
+   */
+  
+  public void testHeartWarmingRankingWithDelta() throws IOException {
+    Query t1 = new TermQuery(new Term(FIELD_BODY, "heart"));
+    Query t2 = new TermQuery(new Term(FIELD_BODY, "warming"));
+    BooleanQuery q = new BooleanQuery();
+    q.add(t1, BooleanClause.Occur.SHOULD);
+    q.add(t2, BooleanClause.Occur.SHOULD);
+    float k1 = 1.2f;
+    float b = 0.75f;
+    float d = 0.5f;
+    
+    Similarity sim =new BM25Similarity(k1,b,d);
+    searcher.setSimilarity(sim);
+    TopDocs topDocs = searcher.search(q, 1000);
+    assertEquals("Failed: " + sim.toString(), "8", reader.document(topDocs.scoreDocs[0].doc).get(FIELD_ID));
+    
+  }
+   
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    dir.close();
+    super.tearDown();
+  }
+}
+  
Index: lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java	(revision 1513555)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java	(working copy)
@@ -38,30 +38,55 @@
 public class BM25Similarity extends Similarity {
   private final float k1;
   private final float b;
-  // TODO: should we add a delta like sifaka.cs.uiuc.edu/~ylv2/pub/sigir11-bm25l.pdf ?
-
-  /**
+  private final float delta;
+  
+    /**
    * BM25 with the supplied parameter values.
    * @param k1 Controls non-linear term frequency normalization (saturation).
    * @param b Controls to what degree document length normalizes tf values.
+   * @param delta Lower-bounds tf normalization to prevent long documents from being over-penalized
+   * See sifaka.cs.uiuc.edu/~ylv2/pub/sigir11-bm25l.pdf for background on delta 
    */
-  public BM25Similarity(float k1, float b) {
+  public BM25Similarity(float k1, float b, float delta) {
     this.k1 = k1;
     this.b  = b;
+    this.delta = delta;
   }
   
+  /**
+   * BM25 with the supplied parameter values. 
+   * @param k1 Controls non-linear term frequency normalization (saturation).
+   * @param b Controls to what degree document length normalizes tf values.
+   * When no delta is supplied it defaults to 0 to provide traditional BM25
+   * See sifaka.cs.uiuc.edu/~ylv2/pub/lowerbound/LBTF.htm re: 
+   * "setting delta to 0 degenerates retrieval model to its traditional version"
+   */
+  public BM25Similarity(float k1, float b ) {
+    this.k1 = k1;
+    this.b  = b;
+    this.delta = 0;
+  }
+  
   /** BM25 with these default values:
    * <ul>
    *   <li>{@code k1 = 1.2},
    *   <li>{@code b = 0.75}.</li>
+   *   <li>{@code delta = 0}.</li>
    * </ul>
    */
   public BM25Similarity() {
     this.k1 = 1.2f;
     this.b  = 0.75f;
+    this.delta= 0f;
   }
   
-  /** Implemented as <code>log(1 + (numDocs - docFreq + 0.5)/(docFreq + 0.5))</code>. */
+  /** Implemented as <code>log(1 + (numDocs - docFreq + 0.5)/(docFreq + 0.5))</code>.
+   *  Modified from original BM25 by addition of 1 which prevents terms with docFreq > 50% of 
+   *  NumDocs from getting a negative weight
+   *  See Dolamic and Savoy (2009) "When Stopword Lists Make the Difference:
+   *  http://members.unine.ch/jacques.savoy/Papers/SavoyStopList.pdf
+   */ 
+   
   protected float idf(long docFreq, long numDocs) {
     return (float) Math.log(1 + (numDocs - docFreq + 0.5D)/(docFreq + 0.5D));
   }
@@ -206,7 +231,7 @@
     // compute freq-independent part of bm25 equation across all norm values
     float cache[] = new float[256];
     for (int i = 0; i < cache.length; i++) {
-      cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
+      cache[i] = ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
     }
     return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
   }
@@ -233,8 +258,9 @@
     @Override
     public float score(int doc, float freq) {
       // if there are no norms, we act as if b=0
-      float norm = norms == null ? k1 : cache[(byte)norms.get(doc) & 0xFF];
-      return weightValue * freq / (freq + norm);
+      float norm = norms == null ? 1 : cache[(byte)norms.get(doc) & 0xFF];
+      float tfn=freq/norm;
+      return weightValue * (tfn + delta) / (tfn + delta +k1 );
     }
     
     @Override
@@ -267,7 +293,7 @@
     private float weight;
     /** field name, for pulling norms */
     private final String field;
-    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
+    /** precomputed norm[256] with  (1 - b) + b * dl / avgdl) */ 
     private final float cache[];
 
     BM25Stats(String field, Explanation idf, float queryBoost, float avgdl, float cache[]) {
@@ -309,13 +335,31 @@
     tfNormExpl.addDetail(new Explanation(k1, "parameter k1"));
     if (norms == null) {
       tfNormExpl.addDetail(new Explanation(0, "parameter b (norms omitted for field)"));
-      tfNormExpl.setValue((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1));
+      if (delta != 0){
+        tfNormExpl.setValue( ((freq.getValue() +delta) * (k1 + 1))/(freq.getValue() +delta +k1));
+      }else{
+        tfNormExpl.setValue((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1));
+      }
     } else {
       float doclen = decodeNormValue((byte)norms.get(doc));
       tfNormExpl.addDetail(new Explanation(b, "parameter b"));
+      
+      if (delta != 0){
+          tfNormExpl.addDetail(new Explanation(delta, "parameter delta"));
+      }
+      
       tfNormExpl.addDetail(new Explanation(stats.avgdl, "avgFieldLength"));
       tfNormExpl.addDetail(new Explanation(doclen, "fieldLength"));
-      tfNormExpl.setValue((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)));
+      
+      if (delta != 0){
+        float tfn = freq.getValue()/(1 - b + b * doclen/stats.avgdl);
+       // tfNormExpl.addDetail(new Explanation(tfn,"term frequency normalized (and lower-bounded)"));
+        tfNormExpl.setValue( ((tfn +delta) * (k1 + 1))/(tfn +delta +k1));
+      }
+      else{
+        
+        tfNormExpl.setValue((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)));
+      }
     }
     result.addDetail(tfNormExpl);
     result.setValue(boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue());
@@ -324,7 +368,7 @@
 
   @Override
   public String toString() {
-    return "BM25(k1=" + k1 + ",b=" + b + ")";
+    return "BM25(k1=" + k1 + ",b=" + b + ",delta=" +delta + ")";
   }
   
   /** 
@@ -342,4 +386,11 @@
   public float getB() {
     return b;
   }
+  /** 
+   * Returns the <code>delta</code> parameter
+   * @see #BM25Similarity(float, float) 
+   */
+  public float getDelta() {
+    return delta;
+  }
 }
