Index: src/java/org/apache/lucene/search/Hits.java =================================================================== --- src/java/org/apache/lucene/search/Hits.java (revision 630625) +++ src/java/org/apache/lucene/search/Hits.java (working copy) @@ -53,6 +53,9 @@ private int numDocs = 0; // number cached private int maxDocs = 200; // max to cache + private boolean normalizeScores = true; // normalize scores by default + private float scoreNorm = 1.0f; + private int nDeletions; // # deleted docs in the index. private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down). private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted. @@ -77,6 +80,46 @@ getMoreDocs(50); // retrieve 100 initially lengthAtStart = length; } + + private float initScoreNorm() { + if (hitDocs.isEmpty()) { + return 1.0f; + } else { + final float firstScore = ((HitDoc) hitDocs.get(0)).score; + if (firstScore > 1.0f) { + return 1.0f / firstScore; + } else { + return 1.0f; + } + } + } + + /** + * Checks whether scores should be normalized to a maximum of 1.0. + * Turned on by default. + * + * @return true if scores should be normalized to a maximum of + * 1.0. + * @see #setNormalizeScores(boolean) + */ + public boolean isNormalizeScores() { + return normalizeScores; + } + + /** + * Turns score normalization on/off. + * + * If turned on (true), the maximum score is 1.0. + * If turned off (false), raw scores from {@link Searcher}s + * are unaltered. + * + * @param normalizeScores + * true if scores should be normalized to a maximum + * of 1.0. + */ + public void setNormalizeScores(boolean normalizeScores) { + this.normalizeScores = normalizeScores; + } // count # deletions, return -1 if unknown. private int countDeletions(Searcher s) throws IOException { @@ -101,11 +144,12 @@ length = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; - - float scoreNorm = 1.0f; - if (length > 0 && topDocs.getMaxScore() > 1.0f) { - scoreNorm = 1.0f / topDocs.getMaxScore(); + if(hitDocs.isEmpty() && length != 0) { + final float maxScore = topDocs.getMaxScore(); + if(maxScore > 1.0f) { + scoreNorm = 1.0f / maxScore; + } } int start = hitDocs.size() - nDeletedHits; @@ -133,8 +177,7 @@ int end = scoreDocs.length < length ? scoreDocs.length : length; length += nDeletedHits; for (int i = start; i < end; i++) { - hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm, - scoreDocs[i].doc)); + hitDocs.addElement(new HitDoc(scoreDocs[i].score, scoreDocs[i].doc)); } nDeletions = nDels2; @@ -172,7 +215,11 @@ /** Returns the score for the nth document in this set. */ public final float score(int n) throws IOException { - return hitDoc(n).score; + if(normalizeScores) { + return hitDoc(n).score * scoreNorm; + } else { + return hitDoc(n).score; + } } /** Returns the id for the nth document in this set. Index: src/test/org/apache/lucene/search/TestHitsScoreNormalization.java =================================================================== --- src/test/org/apache/lucene/search/TestHitsScoreNormalization.java (revision 0) +++ src/test/org/apache/lucene/search/TestHitsScoreNormalization.java (revision 0) @@ -0,0 +1,114 @@ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests {@link Hits}' score normalization behavior (checks if + * {@link Hits#setNormalizeScores(boolean)} works as expected). + * + * @author Christian Kohlschuetter + */ +public class TestHitsScoreNormalization extends LuceneTestCase { + + private RAMDirectory dir; + private IndexSearcher is; + private Query queryB = new TermQuery(new Term("main", "B")); + + protected void setUp() throws Exception { + super.setUp(); + + dir = new RAMDirectory(); + IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), + MaxFieldLength.UNLIMITED); + Document doc; + + doc = new Document(); + doc.add(new Field("main", "A B C", Store.NO, Index.TOKENIZED)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new Field("main", "B B B", Store.NO, Index.TOKENIZED)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new Field("main", "B", Store.NO, Index.TOKENIZED)); + iw.addDocument(doc); + + // add enough documents such that we get raw scores > 1 + // 5 (= 3+2) is OK with DefaultSimilarity + for (int i = 0; i < 2; i++) { + doc = new Document(); + doc.add(new Field("main", "A", Store.NO, Index.TOKENIZED)); + iw.addDocument(doc); + } + + iw.close(); + + is = new IndexSearcher(dir); + } + + protected void tearDown() throws Exception { + super.tearDown(); + is.close(); + dir.close(); + } + + private float[] getScores(final Query q, final boolean raw) + throws IOException { + Hits hits = is.search(new TermQuery(new Term("main", "B"))); + if (raw) { + hits.setNormalizeScores(false); + } + final int len = hits.length(); + final float[] scores = new float[len]; + for (int i = 0; i < len; i++) { + scores[i] = hits.score(i); + } + return scores; + } + + // Use this implementation once we move to Java 5.0 + // private void assertFloatArrayEquals(float[] expected, float[] actual) { + // assertEquals(Arrays.toString(expected), Arrays.toString(actual)); + // } + + private void assertFloatArrayEquals(float[] expected, float[] actual) { + assertEquals(getString(expected), getString(actual)); + } + + private String getString(float[] f) { + StringBuffer sb = new StringBuffer(); + sb.append('['); + for (int i = 0; i < f.length; i++) { + if (i != 0) { + sb.append(','); + } + sb.append(f[i]); + } + sb.append(']'); + return sb.toString(); + } + + public void testNormalizedScores() throws IOException { + final float[] actualScores = getScores(queryB, false); + assertFloatArrayEquals(new float[] { 1.0f, 0.8660253f, 0.5f }, actualScores); + } + + public void testRawScores() throws IOException { + final float[] actualScores = getScores(queryB, true); + assertFloatArrayEquals(new float[] { 1.2231436f, 1.0592734f, 0.6115718f }, + actualScores); + } +}