Index: org/apache/lucene/search/spans/SpanScorer.java
===================================================================
--- org/apache/lucene/search/spans/SpanScorer.java (revision 382121)
+++ org/apache/lucene/search/spans/SpanScorer.java (working copy)
@@ -18,6 +18,7 @@
import java.io.IOException;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Explanation;
@@ -27,7 +28,7 @@
class SpanScorer extends Scorer {
private Spans spans;
private Weight weight;
- private byte[] norms;
+ private NormFactors normFactors;
private float value;
private boolean firstTime = true;
@@ -36,15 +37,20 @@
private int doc;
private float freq;
- SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
- throws IOException {
+ SpanScorer(Spans spans, Weight weight, Similarity similarity, NormFactors normFactors) {
super(similarity);
this.spans = spans;
- this.norms = norms;
+ this.normFactors = normFactors;
this.weight = weight;
this.value = weight.getValue();
}
+ /** @deprecated */
+ SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
+ throws IOException {
+ this(spans, weight, similarity, NormFactors.newInstance(norms));
+ }
+
public boolean next() throws IOException {
if (firstTime) {
more = spans.next();
@@ -69,7 +75,7 @@
public float score() throws IOException {
float raw = getSimilarity().tf(freq) * value; // raw score
- return raw * Similarity.decodeNorm(norms[doc]); // normalize
+ return raw * normFactors.getFactor(doc); // normalize
}
public boolean skipTo(int target) throws IOException {
Index: org/apache/lucene/search/spans/SpanWeight.java
===================================================================
--- org/apache/lucene/search/spans/SpanWeight.java (revision 382121)
+++ org/apache/lucene/search/spans/SpanWeight.java (working copy)
@@ -22,6 +22,7 @@
import java.util.Collection;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
@@ -67,7 +68,7 @@
public Scorer scorer(IndexReader reader) throws IOException {
return new SpanScorer(query.getSpans(reader), this,
similarity,
- reader.norms(query.getField()));
+ reader.getNormFactors(query.getField()));
}
public Explanation explain(IndexReader reader, int doc)
@@ -121,9 +122,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/SloppyPhraseScorer.java
===================================================================
--- org/apache/lucene/search/SloppyPhraseScorer.java (revision 382121)
+++ org/apache/lucene/search/SloppyPhraseScorer.java (working copy)
@@ -16,14 +16,22 @@
* limitations under the License.
*/
-import org.apache.lucene.index.TermPositions;
-
import java.io.IOException;
+import org.apache.lucene.index.NormFactors;
+import org.apache.lucene.index.TermPositions;
+
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ int slop, NormFactors normFactors) {
+ super(weight, tps, positions, similarity, normFactors);
+ this.slop = slop;
+ }
+
+ /** @deprecated */
+ SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
int slop, byte[] norms) {
super(weight, tps, positions, similarity, norms);
this.slop = slop;
Index: org/apache/lucene/search/MultiPhraseQuery.java
===================================================================
--- org/apache/lucene/search/MultiPhraseQuery.java (revision 382121)
+++ org/apache/lucene/search/MultiPhraseQuery.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultipleTermPositions;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Query;
@@ -164,10 +165,10 @@
if (slop == 0)
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
- reader.norms(field));
+ reader.getNormFactors(field));
else
return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
- slop, reader.norms(field));
+ slop, reader.getNormFactors(field));
}
public Explanation explain(IndexReader reader, int doc)
@@ -206,9 +207,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/PhraseScorer.java
===================================================================
--- org/apache/lucene/search/PhraseScorer.java (revision 382121)
+++ org/apache/lucene/search/PhraseScorer.java (working copy)
@@ -22,7 +22,10 @@
abstract class PhraseScorer extends Scorer {
private Weight weight;
+
+ /** @deprecated */
protected byte[] norms;
+ private NormFactors normFactors;
protected float value;
private boolean firstTime = true;
@@ -34,8 +37,20 @@
PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ NormFactors normFactors) {
+ this(weight, tps, positions, similarity, normFactors, null);
+ }
+
+ /** @deprecated */
+ PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
byte[] norms) {
+ this(weight, tps, positions, similarity, NormFactors.newInstance(norms), norms);
+ }
+
+ private PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ NormFactors normFactors, byte[] norms) {
super(similarity);
+ this.normFactors = normFactors;
this.norms = norms;
this.weight = weight;
this.value = weight.getValue();
@@ -89,7 +104,7 @@
public float score() throws IOException {
//System.out.println("scoring " + first.doc);
float raw = getSimilarity().tf(freq) * value; // raw score
- return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
+ return raw * getNormFactors().getFactor(first.doc); // normalize
}
public boolean skipTo(int target) throws IOException {
@@ -149,6 +164,13 @@
return tfExplanation;
}
+ /**
+ * @return the NormFactors for the field associated with this phrase.
+ */
+ public NormFactors getNormFactors() {
+ return this.normFactors;
+ }
+
public String toString() { return "scorer(" + weight + ")"; }
}
Index: org/apache/lucene/search/PhraseQuery.java
===================================================================
--- org/apache/lucene/search/PhraseQuery.java (revision 382121)
+++ org/apache/lucene/search/PhraseQuery.java (working copy)
@@ -20,6 +20,7 @@
import java.util.Set;
import java.util.Vector;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.IndexReader;
@@ -147,11 +148,11 @@
if (slop == 0) // optimize exact case
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
- reader.norms(field));
+ reader.getNormFactors(field));
else
return
new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop,
- reader.norms(field));
+ reader.getNormFactors(field));
}
@@ -211,9 +212,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/TermQuery.java
===================================================================
--- org/apache/lucene/search/TermQuery.java (revision 382121)
+++ org/apache/lucene/search/TermQuery.java (working copy)
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.Set;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.IndexReader;
@@ -66,7 +67,7 @@
return null;
return new TermScorer(this, termDocs, similarity,
- reader.norms(term.field()));
+ reader.getNormFactors(term.field()));
}
public Explanation explain(IndexReader reader, int doc)
@@ -107,9 +108,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/ExactPhraseScorer.java
===================================================================
--- org/apache/lucene/search/ExactPhraseScorer.java (revision 382121)
+++ org/apache/lucene/search/ExactPhraseScorer.java (working copy)
@@ -22,6 +22,12 @@
final class ExactPhraseScorer extends PhraseScorer {
ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ NormFactors normFactors) {
+ super(weight, tps, positions, similarity, normFactors);
+ }
+
+ /** @deprecated */
+ ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
byte[] norms) {
super(weight, tps, positions, similarity, norms);
}
Index: org/apache/lucene/search/TermScorer.java
===================================================================
--- org/apache/lucene/search/TermScorer.java (revision 382121)
+++ org/apache/lucene/search/TermScorer.java (working copy)
@@ -18,6 +18,7 @@
import java.io.IOException;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.TermDocs;
/** Expert: A Scorer for documents matching a Term.
@@ -25,7 +26,7 @@
final class TermScorer extends Scorer {
private Weight weight;
private TermDocs termDocs;
- private byte[] norms;
+ private NormFactors normFactors;
private float weightValue;
private int doc;
@@ -44,17 +45,30 @@
* @param norms The field norms of the document fields for the Term.
*/
TermScorer(Weight weight, TermDocs td, Similarity similarity,
- byte[] norms) {
+ NormFactors normFactors) {
super(similarity);
this.weight = weight;
this.termDocs = td;
- this.norms = norms;
+ this.normFactors = normFactors;
this.weightValue = weight.getValue();
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
scoreCache[i] = getSimilarity().tf(i) * weightValue;
}
+ /** Construct a TermScorer.
+ * @param weight The weight of the Term in the query.
+ * @param td An iterator over the documents matching the Term.
+ * @param similarity The Similarity implementation to be used for score computations.
+ * @param norms The field norms of the document fields for the Term.
+ * @deprecated
+ */
+ TermScorer(Weight weight, TermDocs td, Similarity similarity,
+ byte[] norms) {
+ this(weight, td, similarity, NormFactors.newInstance(norms));
+ }
+
+
public void score(HitCollector hc) throws IOException {
next();
score(hc, Integer.MAX_VALUE);
@@ -62,7 +76,6 @@
protected boolean score(HitCollector c, int end) throws IOException {
Similarity similarity = getSimilarity(); // cache sim in local
- float[] normDecoder = Similarity.getNormDecoder();
while (doc < end) { // for docs in window
int f = freqs[pointer];
float score = // compute tf(f)*weight
@@ -70,7 +83,7 @@
? scoreCache[f] // cache hit
: similarity.tf(f)*weightValue; // cache miss
- score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
+ score *= normFactors.getFactor(doc); // normalize for field
c.collect(doc, score); // collect score
@@ -115,14 +128,14 @@
return true;
}
- public float score() {
+ public float score() throws IOException {
int f = freqs[pointer];
float raw = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss
- return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
+ return raw * normFactors.getFactor(doc); // normalize for field
}
/** Skips to the first match beyond the current whose document number is
Index: org/apache/lucene/search/PhrasePrefixQuery.java
===================================================================
--- org/apache/lucene/search/PhrasePrefixQuery.java (revision 382121)
+++ org/apache/lucene/search/PhrasePrefixQuery.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultipleTermPositions;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Query;
@@ -165,10 +166,10 @@
if (slop == 0)
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
- reader.norms(field));
+ reader.getNormFactors(field));
else
return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
- slop, reader.norms(field));
+ slop, reader.getNormFactors(field));
}
public Explanation explain(IndexReader reader, int doc)
@@ -207,9 +208,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/index/MultiReader.java
===================================================================
--- org/apache/lucene/index/MultiReader.java (revision 382121)
+++ org/apache/lucene/index/MultiReader.java (working copy)
@@ -30,6 +30,7 @@
public class MultiReader extends IndexReader {
private IndexReader[] subReaders;
private int[] starts; // 1st docno for each segment
+ private Hashtable normFactorsCache = new Hashtable();
private Hashtable normsCache = new Hashtable();
private int maxDoc = 0;
private int numDocs = -1;
@@ -159,6 +160,18 @@
return ones;
}
+ public synchronized NormFactors getNormFactors(String field) throws IOException {
+ NormFactors factors = (NormFactors) normFactorsCache.get(field);
+ if (factors != null)
+ return factors;
+ if (!hasNorms(field))
+ return NormFactors.getEmptyInstance();
+
+ NormFactors multiNorms = new MultiNormFactors(field);
+ normsCache.put(field, multiNorms);
+ return multiNorms;
+ }
+
public synchronized byte[] norms(String field) throws IOException {
byte[] bytes = (byte[])normsCache.get(field);
if (bytes != null)
@@ -276,6 +289,24 @@
}
return fieldSet;
}
+
+ /**
+ * Represents a set of norm factors that spans a set of documents.
+ */
+ class MultiNormFactors extends NormFactors {
+ private String field;
+
+ public MultiNormFactors(String field) throws IOException {
+ this.field = field;
+ }
+
+ @Override
+ public byte getByte(int doc) throws IOException {
+ int subReader = MultiReader.this.readerIndex(doc);
+ NormFactors subNormFactors = subReaders[subReader].getNormFactors(this.field);
+ return subNormFactors.getByte(doc - MultiReader.this.starts[subReader]);
+ }
+ }
}
class MultiTermEnum extends TermEnum {
Index: org/apache/lucene/index/IndexReader.java
===================================================================
--- org/apache/lucene/index/IndexReader.java (revision 382121)
+++ org/apache/lucene/index/IndexReader.java (working copy)
@@ -367,13 +367,27 @@
public boolean hasNorms(String field) throws IOException {
// backward compatible implementation.
// SegmentReader has an efficient implementation.
- return norms(field) != null;
+ return getNormFactors(field) != null;
}
+ /**
+ * Returns the normalization factors for the name field of every document.
+ * This is used by the search code to score documents.
+ *
+ * @see Field#setBoost(float)
+ */
+ public NormFactors getNormFactors(String field) throws IOException {
+ // This implementation should eventually go away when {@link #norms(String)}
+ // goes away
+ return NormFactors.newInstance(norms(field));
+ }
+
+
/** Returns the byte-encoded normalization factor for the named field of
* every document. This is used by the search code to score documents.
*
* @see Field#setBoost(float)
+ * @deprecated use {@link #getNorms(String)} instead.
*/
public abstract byte[] norms(String field) throws IOException;
Index: org/apache/lucene/index/NormFactors.java
===================================================================
--- org/apache/lucene/index/NormFactors.java (revision 0)
+++ org/apache/lucene/index/NormFactors.java (revision 0)
@@ -0,0 +1,99 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.Similarity;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Abstract class that represents the normalization factors for each document.
+ * Provides an abstraction from the underlying byte array to allow lazy loading
+ * or mmaped access.
+ */
+
+public abstract class NormFactors {
+ /**
+ * Retrieve an encoded normalization factor from the normalization array.
+ * @param doc the document number. Also the index into the byte array
+ * @return the byte representation of the normalization factor.
+ * @throws IOException
+ * @see Similarity#decodeNorm(byte)
+ */
+ public abstract byte getByte(int doc) throws IOException;
+
+ /**
+ * Retrieve a normalization factor from the normalization array.
+ * @param doc the document number. Also the index into the byte array.
+ * @return the normalization factor
+ */
+ public float getFactor(int doc) throws IOException {
+ return Similarity.decodeNorm(getByte(doc));
+ }
+
+ /**
+ * @return a NormFactors that returns 1.0f for all
+ * normalization factors.
+ */
+ public static NormFactors getEmptyInstance() {
+ return EmptyNormFactors.SINGLETON;
+ }
+
+ /**
+ * Convert a byte array to a NormFactors.
+ * @param norms the byte array from the segment
+ * @return a new NormFactors that indexes the given byte array
+ * @deprecated You should rely on {@link IndexReader#getNormFactors(String)}.
+ */
+ public static NormFactors newInstance(byte[] norms) {
+ return new ByteNormFactors(norms);
+ }
+
+ /**
+ * Provide a "fake norms" for use when a field doesn't have normalization.
+ * This will return the result of {@link Similarity#encodeNorm(float)} for
+ * the value 1.0f.
+ */
+ static class EmptyNormFactors extends NormFactors{
+ // Stores the encoded one byte
+ public final byte encodedOne = Similarity.encodeNorm(1.0f);
+
+ private EmptyNormFactors() {}
+
+ public byte getByte(int doc) {
+ return encodedOne;
+ }
+
+ static EmptyNormFactors SINGLETON = new EmptyNormFactors();
+ }
+
+
+ /**
+ * Helper class to convert the old byte[] norms into a NormFactors.
+ *
+ */
+ static class ByteNormFactors extends NormFactors {
+ private byte[] norms;
+ public ByteNormFactors(byte[] norms) {
+ this.norms = norms;
+ }
+
+ public byte getByte(int doc) {
+ return this.norms[doc];
+ }
+ }
+}
Index: org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- org/apache/lucene/index/FilterIndexReader.java (revision 382121)
+++ org/apache/lucene/index/FilterIndexReader.java (working copy)
@@ -111,6 +111,8 @@
return in.hasNorms(field);
}
+ public NormFactors getNormFactors(String f) throws IOException { return in.getNormFactors(f); }
+ /** @deprecated */
public byte[] norms(String f) throws IOException { return in.norms(f); }
public void norms(String f, byte[] bytes, int offset) throws IOException {
in.norms(f, bytes, offset);
Index: org/apache/lucene/index/ParallelReader.java
===================================================================
--- org/apache/lucene/index/ParallelReader.java (revision 382121)
+++ org/apache/lucene/index/ParallelReader.java (working copy)
@@ -169,6 +169,13 @@
return ((IndexReader)fieldToReader.get(field)).hasNorms(field);
}
+ public NormFactors getNormFactors(String field) throws IOException {
+ return ((IndexReader)fieldToReader.get(field)).getNormFactors(field);
+ }
+
+ /**
+ * @deprecated
+ */
public byte[] norms(String field) throws IOException {
return ((IndexReader)fieldToReader.get(field)).norms(field);
}
Index: org/apache/lucene/index/SegmentReader.java
===================================================================
--- org/apache/lucene/index/SegmentReader.java (revision 382121)
+++ org/apache/lucene/index/SegmentReader.java (working copy)
@@ -51,7 +51,7 @@
// Compound File Reader when based on a compound file segment
CompoundFileReader cfsReader = null;
- private class Norm {
+ private class Norm extends NormFactors {
public Norm(IndexInput in, int number)
{
this.in = in;
@@ -81,6 +81,11 @@
directory().renameFile(segment + ".tmp", fileName);
this.dirty = false;
}
+
+ @Override
+ public byte getByte(int doc) {
+ return bytes[doc];
+ }
}
private Hashtable norms = new Hashtable();
@@ -455,7 +460,11 @@
return ones;
}
+
// can return null if norms aren't stored
+ /**
+ * @deprecated
+ */
protected synchronized byte[] getNorms(String field) throws IOException {
Norm norm = (Norm) norms.get(field);
if (norm == null) return null; // not indexed, or norms not stored
@@ -468,6 +477,18 @@
return norm.bytes;
}
+ public synchronized NormFactors getNormFactors(String field) throws IOException {
+ Norm norm = (Norm) norms.get(field);
+ if (norm == null) return NormFactors.getEmptyInstance(); // not indexed, or norms not stored
+
+ if (norm.bytes == null) { // value not yet read
+ byte[] bytes = new byte[maxDoc()];
+ norms(field, bytes, 0);
+ norm.bytes = bytes; // cache it
+ }
+ return norm;
+ }
+
// returns fake norms if norms aren't available
public synchronized byte[] norms(String field) throws IOException {
byte[] bytes = getNorms(field);