Index: src/test/org/apache/lucene/search/TestSimilarity.java =================================================================== --- src/test/org/apache/lucene/search/TestSimilarity.java (revision 645637) +++ src/test/org/apache/lucene/search/TestSimilarity.java (working copy) @@ -17,18 +17,16 @@ * limitations under the License. */ +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import java.util.Collection; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - /** Similarity unit test. * * @@ -38,29 +36,49 @@ public TestSimilarity(String name) { super(name); } - + public static class SimpleSimilarity extends Similarity { - public float lengthNorm(String field, int numTerms) { return 1.0f; } - public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } - public float tf(float freq) { return freq; } - public float sloppyFreq(int distance) { return 2.0f; } - public float idf(Collection terms, Searcher searcher) { return 1.0f; } - public float idf(int docFreq, int numDocs) { return 1.0f; } - public float coord(int overlap, int maxOverlap) { return 1.0f; } + public float lengthNorm(String field, int numTerms) { + return 1.0f; + } + + public float queryNorm(float sumOfSquaredWeights) { + return 1.0f; + } + + public float tf(float freq) { + return freq; + } + + public float sloppyFreq(int distance) { + return 2.0f; + } + + public float idf(Collection terms, Searcher searcher) { + return 1.0f; + } + + public float idf(int docFreq, int numDocs) { + return 1.0f; + } + + public float coord(int overlap, int maxOverlap) { + return 1.0f; + } } public void testSimilarity() throws Exception { RAMDirectory store = new RAMDirectory(); - IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, - IndexWriter.MaxFieldLength.LIMITED); + IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); writer.setSimilarity(new SimpleSimilarity()); - + Document d1 = new Document(); d1.add(new Field("field", "a c", Field.Store.YES, Field.Index.TOKENIZED)); Document d2 = new Document(); d2.add(new Field("field", "a b c", Field.Store.YES, Field.Index.TOKENIZED)); - + writer.addDocument(d1); writer.addDocument(d2); writer.optimize(); @@ -74,48 +92,80 @@ Term c = new Term("field", "c"); searcher.search - (new TermQuery(b), - new HitCollector() { - public final void collect(int doc, float score) { - assertTrue(score == 1.0f); - } - }); + (new TermQuery(b), + new HitCollector() { + public final void collect(int doc, float score) { + assertTrue(score == 1.0f); + } + }); BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(a), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(b), BooleanClause.Occur.SHOULD); //System.out.println(bq.toString("field")); searcher.search - (bq, - new HitCollector() { - public final void collect(int doc, float score) { - //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == (float)doc+1); - } - }); + (bq, + new HitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Doc=" + doc + " score=" + score); + assertTrue(score == (float) doc + 1); + } + }); PhraseQuery pq = new PhraseQuery(); pq.add(a); pq.add(c); //System.out.println(pq.toString("field")); searcher.search - (pq, - new HitCollector() { - public final void collect(int doc, float score) { - //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == 1.0f); - } - }); + (pq, + new HitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Doc=" + doc + " score=" + score); + assertTrue(score == 1.0f); + } + }); pq.setSlop(2); //System.out.println(pq.toString("field")); searcher.search - (pq, - new HitCollector() { - public final void collect(int doc, float score) { - //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == 2.0f); - } - }); + (pq, + new HitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Doc=" + doc + " score=" + score); + assertTrue(score == 2.0f); + } + }); } + + public void testNormCodec() { + + Similarity.NormCodec normCodec; + + normCodec = new Similarity.DefaultNormCodec(); + assertEquals(10f, normCodec.decodeNorm(normCodec.encodeNorm(10f))); + assertEquals(10f, normCodec.decodeNorm(normCodec.encodeNorm(11f))); + assertEquals(12f, normCodec.decodeNorm(normCodec.encodeNorm(12f))); + assertEquals(12f, normCodec.decodeNorm(normCodec.encodeNorm(13f))); + assertEquals(14f, normCodec.decodeNorm(normCodec.encodeNorm(14f))); + + + new Similarity.SimpleNormCodec(new float[255]); + new Similarity.SimpleNormCodec(new float[256]); + + try { + new Similarity.SimpleNormCodec(new float[257]); + fail("Supposed to throw ArrayOutOfBoundsException"); + } catch (Exception e) { + // all good + } + + normCodec = new Similarity.SimpleNormCodec(new float[]{9f, 10f, 11, 12f, 13f}); + assertEquals(9f, normCodec.decodeNorm(normCodec.encodeNorm(9f))); + assertEquals(10f, normCodec.decodeNorm(normCodec.encodeNorm(10f))); + assertEquals(11f, normCodec.decodeNorm(normCodec.encodeNorm(11f))); + assertEquals(12f, normCodec.decodeNorm(normCodec.encodeNorm(12f))); + assertEquals(13f, normCodec.decodeNorm(normCodec.encodeNorm(13f))); + + + } } Index: src/java/org/apache/lucene/search/Similarity.java =================================================================== --- src/java/org/apache/lucene/search/Similarity.java (revision 645637) +++ src/java/org/apache/lucene/search/Similarity.java (working copy) @@ -22,8 +22,11 @@ import java.io.IOException; import java.io.Serializable; +import java.io.Writer; import java.util.Collection; import java.util.Iterator; +import java.util.Arrays; +import java.text.DecimalFormat; /** Expert: Scoring API. *

Subclasses implement search scoring. @@ -311,26 +314,30 @@ return Similarity.defaultImpl; } - /** Cache of decoded bytes. */ - private static final float[] NORM_TABLE = new float[256]; + private static NormCodec normCodec = new DefaultNormCodec(); - static { - for (int i = 0; i < 256; i++) - NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i); + + public static NormCodec getNormCodec() { + return normCodec; } + public static void setNormCodec(NormCodec normCodec) { + Similarity.normCodec = normCodec; + } + /** Decodes a normalization factor stored in an index. * @see #encodeNorm(float) */ public static float decodeNorm(byte b) { - return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127 + return normCodec.decodeNorm(b); } /** Returns a table for decoding normalization bytes. + * @deprecated access norm decoder using {@link Similarity#getNormCodec()} * @see #encodeNorm(float) */ public static float[] getNormDecoder() { - return NORM_TABLE; + return normCodec.getNormsTable(); } /** Computes the normalization value for a field given the total number of @@ -385,7 +392,7 @@ * @see org.apache.lucene.util.SmallFloat */ public static byte encodeNorm(float f) { - return SmallFloat.floatToByte315(f); + return normCodec.encodeNorm(f); } @@ -523,4 +530,89 @@ //Do nothing return 1; } + + + public static interface NormCodec { + + /** Encodes a normalization factor for storage in an index. + * @see org.apache.lucene.document.Field#setBoost(float) + */ + public abstract byte encodeNorm(float f); + + /** Decodes a normalization factor stored in an index. + * @see #encodeNorm(float) + */ + public abstract float decodeNorm(byte b); + + /** + * @deprecated 2.3.1 backwards compatibility + * @throws RuntimeException if no backwards compatibile norms table + * @see org.apache.lucene.search.Similarity#getNormDecoder() + */ + public abstract float[] getNormsTable(); + + } + + public static class DefaultNormCodec implements NormCodec { + + /** Cache of decoded bytes. */ + private float[] normsTable = new float[256]; + + public DefaultNormCodec() { + for (int i = 0; i < 256; i++) { + normsTable[i] = SmallFloat.byte315ToFloat((byte)i); + } + } + + public byte encodeNorm(float f) { + return SmallFloat.floatToByte315(f); + } + + public float decodeNorm(byte b) { + return normsTable[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127 + } + + + public float[] getNormsTable() { + return normsTable; + } + } + + public static class SimpleNormCodec implements NormCodec { + + private float[] normsTable; + + public SimpleNormCodec(float[] normsTable) { + setNormsTable(normsTable); + } + + public float decodeNorm(byte b) { + return normsTable[b]; + } + + public byte encodeNorm(float f) { + int pos = Arrays.binarySearch(normsTable, f); + if (pos < 0) { + pos *= -1; + pos--; + } + return (byte) pos; + } + + + public float[] getNormsTable() { + throw new RuntimeException("Not supported"); + } + + public void setNormsTable(float[] normsTable) { + if (normsTable == null) { + throw new NullPointerException("parameter normsTable is null"); + } else if (normsTable.length > 256) { + throw new ArrayIndexOutOfBoundsException("parameter normsTable must not exceed length 256."); + } + this.normsTable = normsTable; + } + } + + } Index: src/java/org/apache/lucene/search/TermScorer.java =================================================================== --- src/java/org/apache/lucene/search/TermScorer.java (revision 645637) +++ src/java/org/apache/lucene/search/TermScorer.java (working copy) @@ -63,7 +63,6 @@ protected boolean score(HitCollector c, int end) throws IOException { Similarity similarity = getSimilarity(); // cache sim in local - float[] normDecoder = Similarity.getNormDecoder(); while (doc < end) { // for docs in window int f = freqs[pointer]; float score = // compute tf(f)*weight @@ -71,7 +70,7 @@ ? scoreCache[f] // cache hit : similarity.tf(f)*weightValue; // cache miss - score *= normDecoder[norms[doc] & 0xFF]; // normalize for field + score *= Similarity.decodeNorm(norms[doc]); // normalize for field c.collect(doc, score); // collect score