Index: src/java/org/apache/lucene/search/Hits.java
===================================================================
--- src/java/org/apache/lucene/search/Hits.java (revision 630625)
+++ src/java/org/apache/lucene/search/Hits.java (working copy)
@@ -53,6 +53,9 @@
private int numDocs = 0; // number cached
private int maxDocs = 200; // max to cache
+ private boolean normalizeScores = true; // normalize scores by default
+ private float scoreNorm = 1.0f;
+
private int nDeletions; // # deleted docs in the index.
private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
@@ -77,6 +80,46 @@
getMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
+
+ private float initScoreNorm() {
+ if (hitDocs.isEmpty()) {
+ return 1.0f;
+ } else {
+ final float firstScore = ((HitDoc) hitDocs.get(0)).score;
+ if (firstScore > 1.0f) {
+ return 1.0f / firstScore;
+ } else {
+ return 1.0f;
+ }
+ }
+ }
+
+ /**
+ * Checks whether scores should be normalized to a maximum of 1.0.
+ * Turned on by default.
+ *
+ * @return true if scores should be normalized to a maximum of
+ * 1.0.
+ * @see #setNormalizeScores(boolean)
+ */
+ public boolean isNormalizeScores() {
+ return normalizeScores;
+ }
+
+ /**
+ * Turns score normalization on/off.
+ *
+ * If turned on (true), the maximum score is 1.0.
+ * If turned off (false), raw scores from {@link Searcher}s
+ * are unaltered.
+ *
+ * @param normalizeScores
+ * true if scores should be normalized to a maximum
+ * of 1.0.
+ */
+ public void setNormalizeScores(boolean normalizeScores) {
+ this.normalizeScores = normalizeScores;
+ }
// count # deletions, return -1 if unknown.
private int countDeletions(Searcher s) throws IOException {
@@ -101,11 +144,12 @@
length = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-
- float scoreNorm = 1.0f;
- if (length > 0 && topDocs.getMaxScore() > 1.0f) {
- scoreNorm = 1.0f / topDocs.getMaxScore();
+ if(hitDocs.isEmpty() && length != 0) {
+ final float maxScore = topDocs.getMaxScore();
+ if(maxScore > 1.0f) {
+ scoreNorm = 1.0f / maxScore;
+ }
}
int start = hitDocs.size() - nDeletedHits;
@@ -133,8 +177,7 @@
int end = scoreDocs.length < length ? scoreDocs.length : length;
length += nDeletedHits;
for (int i = start; i < end; i++) {
- hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
- scoreDocs[i].doc));
+ hitDocs.addElement(new HitDoc(scoreDocs[i].score, scoreDocs[i].doc));
}
nDeletions = nDels2;
@@ -172,7 +215,11 @@
/** Returns the score for the nth document in this set. */
public final float score(int n) throws IOException {
- return hitDoc(n).score;
+ if(normalizeScores) {
+ return hitDoc(n).score * scoreNorm;
+ } else {
+ return hitDoc(n).score;
+ }
}
/** Returns the id for the nth document in this set.
Index: src/test/org/apache/lucene/search/TestHitsScoreNormalization.java
===================================================================
--- src/test/org/apache/lucene/search/TestHitsScoreNormalization.java (revision 0)
+++ src/test/org/apache/lucene/search/TestHitsScoreNormalization.java (revision 0)
@@ -0,0 +1,114 @@
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Tests {@link Hits}' score normalization behavior (checks if
+ * {@link Hits#setNormalizeScores(boolean)} works as expected).
+ *
+ * @author Christian Kohlschuetter
+ */
+public class TestHitsScoreNormalization extends LuceneTestCase {
+
+ private RAMDirectory dir;
+ private IndexSearcher is;
+ private Query queryB = new TermQuery(new Term("main", "B"));
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ dir = new RAMDirectory();
+ IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(),
+ MaxFieldLength.UNLIMITED);
+ Document doc;
+
+ doc = new Document();
+ doc.add(new Field("main", "A B C", Store.NO, Index.TOKENIZED));
+ iw.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("main", "B B B", Store.NO, Index.TOKENIZED));
+ iw.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("main", "B", Store.NO, Index.TOKENIZED));
+ iw.addDocument(doc);
+
+ // add enough documents such that we get raw scores > 1
+ // 5 (= 3+2) is OK with DefaultSimilarity
+ for (int i = 0; i < 2; i++) {
+ doc = new Document();
+ doc.add(new Field("main", "A", Store.NO, Index.TOKENIZED));
+ iw.addDocument(doc);
+ }
+
+ iw.close();
+
+ is = new IndexSearcher(dir);
+ }
+
+ protected void tearDown() throws Exception {
+ super.tearDown();
+ is.close();
+ dir.close();
+ }
+
+ private float[] getScores(final Query q, final boolean raw)
+ throws IOException {
+ Hits hits = is.search(new TermQuery(new Term("main", "B")));
+ if (raw) {
+ hits.setNormalizeScores(false);
+ }
+ final int len = hits.length();
+ final float[] scores = new float[len];
+ for (int i = 0; i < len; i++) {
+ scores[i] = hits.score(i);
+ }
+ return scores;
+ }
+
+ // Use this implementation once we move to Java 5.0
+ // private void assertFloatArrayEquals(float[] expected, float[] actual) {
+ // assertEquals(Arrays.toString(expected), Arrays.toString(actual));
+ // }
+
+ private void assertFloatArrayEquals(float[] expected, float[] actual) {
+ assertEquals(getString(expected), getString(actual));
+ }
+
+ private String getString(float[] f) {
+ StringBuffer sb = new StringBuffer();
+ sb.append('[');
+ for (int i = 0; i < f.length; i++) {
+ if (i != 0) {
+ sb.append(',');
+ }
+ sb.append(f[i]);
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+
+ public void testNormalizedScores() throws IOException {
+ final float[] actualScores = getScores(queryB, false);
+ assertFloatArrayEquals(new float[] { 1.0f, 0.8660253f, 0.5f }, actualScores);
+ }
+
+ public void testRawScores() throws IOException {
+ final float[] actualScores = getScores(queryB, true);
+ assertFloatArrayEquals(new float[] { 1.2231436f, 1.0592734f, 0.6115718f },
+ actualScores);
+ }
+}