Index: lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java =================================================================== --- lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java (revision 0) +++ lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java (working copy) @@ -0,0 +1,251 @@ +package org.apache.lucene.queries.function; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.queries.function.valuesource.NormValueSource; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.TFIDFSimilarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestLongNormValueSource extends LuceneTestCase { + + static Directory dir; + static IndexReader reader; + static IndexSearcher searcher; + private static Similarity sim = new PreciseDefaultSimilarity(); + + @BeforeClass + public static void beforeClass() throws Exception { + dir = newDirectory(); + IndexWriterConfig iwConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwConfig.setMergePolicy(newLogMergePolicy()); + iwConfig.setSimilarity(sim); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig); + + Document doc = new Document(); + doc.add(new TextField("text", "this is a test test test", Field.Store.NO)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("text", "second test", Field.Store.NO)); + iw.addDocument(doc); + + reader = iw.getReader(); + searcher = newSearcher(reader); + iw.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher = null; + reader.close(); + reader = null; + dir.close(); + dir = null; + } + + public void testNorm() throws Exception { + Similarity saved = searcher.getSimilarity(); + try { + // no norm field (so agnostic to indexed similarity) + searcher.setSimilarity(sim); + assertHits(new FunctionQuery( + new NormValueSource("text")), + new float[] { 0f, 0f }); + } finally { + searcher.setSimilarity(saved); + } + } + + void assertHits(Query q, float scores[]) throws Exception { + ScoreDoc expected[] = new ScoreDoc[scores.length]; + int expectedDocs[] = new int[scores.length]; + for (int i = 0; i < expected.length; i++) { + expectedDocs[i] = i; + expected[i] = new ScoreDoc(i, scores[i]); + } + TopDocs docs = searcher.search(q, 2, new Sort(new SortField("id", SortField.Type.STRING))); + + /* + for (int i=0;ioverlap / maxOverlap. */ + @Override + public float coord(int overlap, int maxOverlap) { + return overlap / (float)maxOverlap; + } + + /** Implemented as 1/sqrt(sumOfSquaredWeights). */ + @Override + public float queryNorm(float sumOfSquaredWeights) { + return (float)(1.0 / Math.sqrt(sumOfSquaredWeights)); +// return 1f; + } + + /** + * Encodes a normalization factor for storage in an index. + *

+ * The encoding uses a three-bit mantissa, a five-bit exponent, and the + * zero-exponent point at 15, thus representing values from around 7x10^9 to + * 2x10^-9 with about one significant decimal digit of accuracy. Zero is also + * represented. Negative numbers are rounded up to zero. Values too large to + * represent are rounded down to the largest representable value. Positive + * values too small to represent are rounded up to the smallest positive + * representable value. + * + * @see org.apache.lucene.document.Field#setBoost(float) + * @see org.apache.lucene.util.SmallFloat + */ + @Override + public final long encodeNormValue(float f) { + return Float.floatToIntBits(f); + } + + /** + * Decodes the norm value, assuming it is a single byte. + * + * @see #encodeNormValue(float) + */ + @Override + public final float decodeNormValue(long norm) { + return Float.intBitsToFloat((int)norm); + } + + /** Implemented as + * state.getBoost()*lengthNorm(numTerms), where + * numTerms is {@link org.apache.lucene.index.FieldInvertState#getLength()} if {@link + * #setDiscountOverlaps} is false, else it's {@link + * org.apache.lucene.index.FieldInvertState#getLength()} - {@link + * org.apache.lucene.index.FieldInvertState#getNumOverlap()}. + * + * @lucene.experimental */ + @Override + public float lengthNorm(FieldInvertState state) { + final int numTerms; + if (discountOverlaps) + numTerms = state.getLength() - state.getNumOverlap(); + else + numTerms = state.getLength(); + return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))); + } + + /** Implemented as sqrt(freq). */ + @Override + public float tf(float freq) { + return (float)Math.sqrt(freq); + } + + /** Implemented as 1 / (distance + 1). */ + @Override + public float sloppyFreq(int distance) { + return 1.0f / (distance + 1); + } + + /** The default implementation returns 1 */ + @Override + public float scorePayload(int doc, int start, int end, BytesRef payload) { + return 1; + } + + /** Implemented as log(numDocs/(docFreq+1)) + 1. */ + @Override + public float idf(long docFreq, long numDocs) { + return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0); + } + + /** + * True if overlap tokens (tokens with a position of increment of zero) are + * discounted from the document's length. + */ + protected boolean discountOverlaps = true; + + /** Determines whether overlap tokens (Tokens with + * 0 position increment) are ignored when computing + * norm. By default this is true, meaning overlap + * tokens do not count when computing norms. + * + * @lucene.experimental + * + * @see #computeNorm + */ + public void setDiscountOverlaps(boolean v) { + discountOverlaps = v; + } + + /** + * Returns true if overlap tokens are discounted from the document's length. + * @see #setDiscountOverlaps + */ + public boolean getDiscountOverlaps() { + return discountOverlaps; + } + + @Override + public String toString() { + return "DefaultSimilarity"; + } +} Property changes on: lucene/queries/src/test/org/apache/lucene/queries/function/TestLongNormValueSource.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java =================================================================== --- lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java (revision 1562396) +++ lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java (working copy) @@ -71,7 +71,7 @@ return new FloatDocValues(this) { @Override public float floatVal(int doc) { - return similarity.decodeNormValue((byte)norms.get(doc)); + return similarity.decodeNormValue(norms.get(doc)); } }; } Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1562396) +++ lucene/CHANGES.txt (working copy) @@ -211,6 +211,10 @@ return any groups when the joined query required more than one rewrite step (Peng Cheng via Mike McCandless) +* LUCENE-5398: NormValueSource was incorrectly casting the long value + to byte, before calling Similarity.decodeNormValue. (Peng Cheng via + Mike McCandless) + API Changes * LUCENE-5339: The facet module was simplified/reworked to make the