Index: org/apache/lucene/search/spans/SpanScorer.java
===================================================================
--- org/apache/lucene/search/spans/SpanScorer.java (revision 382121)
+++ org/apache/lucene/search/spans/SpanScorer.java (working copy)
@@ -18,6 +18,7 @@
import java.io.IOException;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Explanation;
@@ -27,7 +28,7 @@
class SpanScorer extends Scorer {
private Spans spans;
private Weight weight;
- private byte[] norms;
+ private NormFactors normFactors;
private float value;
private boolean firstTime = true;
@@ -36,13 +37,18 @@
private int doc;
private float freq;
+ SpanScorer(Spans spans, Weight weight, Similarity similarity, NormFactors normFactors) {
+ super(similarity);
+ this.spans = spans;
+ this.normFactors = normFactors;
+ this.weight = weight;
+ this.value = weight.getValue();
+ }
+
+ /** @deprecated */
SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
throws IOException {
- super(similarity);
- this.spans = spans;
- this.norms = norms;
- this.weight = weight;
- this.value = weight.getValue();
+ this(spans, weight, similarity, NormFactors.newInstance(norms));
}
public boolean next() throws IOException {
@@ -69,7 +75,7 @@
public float score() throws IOException {
float raw = getSimilarity().tf(freq) * value; // raw score
- return raw * Similarity.decodeNorm(norms[doc]); // normalize
+ return raw * normFactors.getFactor(doc); // normalize
}
public boolean skipTo(int target) throws IOException {
Index: org/apache/lucene/search/spans/SpanWeight.java
===================================================================
--- org/apache/lucene/search/spans/SpanWeight.java (revision 382121)
+++ org/apache/lucene/search/spans/SpanWeight.java (working copy)
@@ -22,6 +22,7 @@
import java.util.Collection;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
@@ -67,7 +68,7 @@
public Scorer scorer(IndexReader reader) throws IOException {
return new SpanScorer(query.getSpans(reader), this,
similarity,
- reader.norms(query.getField()));
+ reader.getNormFactors(query.getField()));
}
public Explanation explain(IndexReader reader, int doc)
@@ -121,9 +122,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/SloppyPhraseScorer.java
===================================================================
--- org/apache/lucene/search/SloppyPhraseScorer.java (revision 382121)
+++ org/apache/lucene/search/SloppyPhraseScorer.java (working copy)
@@ -16,14 +16,22 @@
* limitations under the License.
*/
-import org.apache.lucene.index.TermPositions;
-
import java.io.IOException;
+import org.apache.lucene.index.NormFactors;
+import org.apache.lucene.index.TermPositions;
+
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ int slop, NormFactors normFactors) {
+ super(weight, tps, positions, similarity, normFactors);
+ this.slop = slop;
+ }
+
+ /** @deprecated */
+ SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
int slop, byte[] norms) {
super(weight, tps, positions, similarity, norms);
this.slop = slop;
Index: org/apache/lucene/search/MultiPhraseQuery.java
===================================================================
--- org/apache/lucene/search/MultiPhraseQuery.java (revision 382121)
+++ org/apache/lucene/search/MultiPhraseQuery.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultipleTermPositions;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Query;
@@ -76,7 +77,7 @@
/**
* Allows to specify the relative position of terms within the phrase.
- *
+ *
* @see PhraseQuery#add(Term, int)
* @param terms
* @param position
@@ -139,7 +140,7 @@
public void normalize(float queryNorm) {
this.queryNorm = queryNorm;
queryWeight *= queryNorm; // normalize query weight
- value = queryWeight * idf; // idf for document
+ value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
@@ -164,10 +165,10 @@
if (slop == 0)
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
- reader.norms(field));
+ reader.getNormFactors(field));
else
return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
- slop, reader.norms(field));
+ slop, reader.getNormFactors(field));
}
public Explanation explain(IndexReader reader, int doc)
@@ -206,9 +207,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/PhraseScorer.java
===================================================================
--- org/apache/lucene/search/PhraseScorer.java (revision 382121)
+++ org/apache/lucene/search/PhraseScorer.java (working copy)
@@ -22,7 +22,10 @@
abstract class PhraseScorer extends Scorer {
private Weight weight;
+
+ /** @deprecated */
protected byte[] norms;
+ private NormFactors normFactors;
protected float value;
private boolean firstTime = true;
@@ -34,8 +37,20 @@
PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
- byte[] norms) {
+ NormFactors normFactors) {
+ this(weight, tps, positions, similarity, normFactors, null);
+ }
+
+ /** @deprecated */
+ PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ byte[] norms) {
+ this(weight, tps, positions, similarity, NormFactors.newInstance(norms), norms);
+ }
+
+ private PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ NormFactors normFactors, byte[] norms) {
super(similarity);
+ this.normFactors = normFactors;
this.norms = norms;
this.weight = weight;
this.value = weight.getValue();
@@ -65,7 +80,7 @@
}
return doNext();
}
-
+
// next without initial increment
private boolean doNext() throws IOException {
while (more) {
@@ -89,7 +104,7 @@
public float score() throws IOException {
//System.out.println("scoring " + first.doc);
float raw = getSimilarity().tf(freq) * value; // raw score
- return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
+ return raw * getNormFactors().getFactor(first.doc); // normalize
}
public boolean skipTo(int target) throws IOException {
@@ -104,12 +119,12 @@
protected abstract float phraseFreq() throws IOException;
private void init() throws IOException {
- for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
+ for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
more = pp.next();
if(more)
sort();
}
-
+
private void sort() {
pq.clear();
for (PhrasePositions pp = first; pp != null; pp = pp.next)
@@ -149,6 +164,13 @@
return tfExplanation;
}
+ /**
+ * @return the NormFactors for the field associated with this phrase.
+ */
+ public NormFactors getNormFactors() {
+ return this.normFactors;
+ }
+
public String toString() { return "scorer(" + weight + ")"; }
}
Index: org/apache/lucene/search/PhraseQuery.java
===================================================================
--- org/apache/lucene/search/PhraseQuery.java (revision 382121)
+++ org/apache/lucene/search/PhraseQuery.java (working copy)
@@ -20,6 +20,7 @@
import java.util.Set;
import java.util.Vector;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.IndexReader;
@@ -27,7 +28,7 @@
/** A Query that matches documents containing a particular sequence of terms.
* A PhraseQuery is built by QueryParser for input like "new york".
- *
+ *
*
This query may be combined with other terms or queries with a {@link BooleanQuery}.
*/
public class PhraseQuery extends Query {
@@ -74,7 +75,7 @@
* The relative position of the term within the phrase is specified explicitly.
* This allows e.g. phrases with more than one term at the same position
* or phrases with gaps (e.g. in connection with stopwords).
- *
+ *
* @param term
* @param position
*/
@@ -130,7 +131,7 @@
public void normalize(float queryNorm) {
this.queryNorm = queryNorm;
queryWeight *= queryNorm; // normalize query weight
- value = queryWeight * idf; // idf for document
+ value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
@@ -147,11 +148,11 @@
if (slop == 0) // optimize exact case
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
- reader.norms(field));
+ reader.getNormFactors(field));
else
return
new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop,
- reader.norms(field));
+ reader.getNormFactors(field));
}
@@ -211,9 +212,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/TermQuery.java
===================================================================
--- org/apache/lucene/search/TermQuery.java (revision 382121)
+++ org/apache/lucene/search/TermQuery.java (working copy)
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.Set;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.IndexReader;
@@ -66,7 +67,7 @@
return null;
return new TermScorer(this, termDocs, similarity,
- reader.norms(term.field()));
+ reader.getNormFactors(term.field()));
}
public Explanation explain(IndexReader reader, int doc)
@@ -107,9 +108,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/search/ExactPhraseScorer.java
===================================================================
--- org/apache/lucene/search/ExactPhraseScorer.java (revision 382121)
+++ org/apache/lucene/search/ExactPhraseScorer.java (working copy)
@@ -22,6 +22,12 @@
final class ExactPhraseScorer extends PhraseScorer {
ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
+ NormFactors normFactors) {
+ super(weight, tps, positions, similarity, normFactors);
+ }
+
+ /** @deprecated */
+ ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
byte[] norms) {
super(weight, tps, positions, similarity, norms);
}
@@ -45,7 +51,7 @@
}
freq++; // all equal: a match
} while (last.nextPosition());
-
+
return (float)freq;
}
}
Index: org/apache/lucene/search/TermScorer.java
===================================================================
--- org/apache/lucene/search/TermScorer.java (revision 382121)
+++ org/apache/lucene/search/TermScorer.java (working copy)
@@ -18,6 +18,7 @@
import java.io.IOException;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.TermDocs;
/** Expert: A Scorer for documents matching a Term.
@@ -25,7 +26,7 @@
final class TermScorer extends Scorer {
private Weight weight;
private TermDocs termDocs;
- private byte[] norms;
+ private NormFactors normFactors;
private float weightValue;
private int doc;
@@ -44,17 +45,30 @@
* @param norms The field norms of the document fields for the Term.
*/
TermScorer(Weight weight, TermDocs td, Similarity similarity,
- byte[] norms) {
+ NormFactors normFactors) {
super(similarity);
this.weight = weight;
this.termDocs = td;
- this.norms = norms;
+ this.normFactors = normFactors;
this.weightValue = weight.getValue();
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
scoreCache[i] = getSimilarity().tf(i) * weightValue;
}
+ /** Construct a TermScorer.
+ * @param weight The weight of the Term in the query.
+ * @param td An iterator over the documents matching the Term.
+ * @param similarity The Similarity implementation to be used for score computations.
+ * @param norms The field norms of the document fields for the Term.
+ * @deprecated
+ */
+ TermScorer(Weight weight, TermDocs td, Similarity similarity,
+ byte[] norms) {
+ this(weight, td, similarity, NormFactors.newInstance(norms));
+ }
+
+
public void score(HitCollector hc) throws IOException {
next();
score(hc, Integer.MAX_VALUE);
@@ -62,7 +76,6 @@
protected boolean score(HitCollector c, int end) throws IOException {
Similarity similarity = getSimilarity(); // cache sim in local
- float[] normDecoder = Similarity.getNormDecoder();
while (doc < end) { // for docs in window
int f = freqs[pointer];
float score = // compute tf(f)*weight
@@ -70,7 +83,7 @@
? scoreCache[f] // cache hit
: similarity.tf(f)*weightValue; // cache miss
- score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
+ score *= normFactors.getFactor(doc); // normalize for field
c.collect(doc, score); // collect score
@@ -83,7 +96,7 @@
doc = Integer.MAX_VALUE; // set to sentinel value
return false;
}
- }
+ }
doc = docs[pointer];
}
return true;
@@ -110,23 +123,23 @@
doc = Integer.MAX_VALUE; // set to sentinel value
return false;
}
- }
+ }
doc = docs[pointer];
return true;
}
- public float score() {
+ public float score() throws IOException {
int f = freqs[pointer];
float raw = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss
- return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
+ return raw * normFactors.getFactor(doc); // normalize for field
}
/** Skips to the first match beyond the current whose document number is
- * greater than or equal to a given target.
+ * greater than or equal to a given target.
*
The implementation uses {@link TermDocs#skipTo(int)}.
* @param target The target document number.
* @return true iff there is such a match.
@@ -178,7 +191,7 @@
termDocs.close();
tfExplanation.setValue(getSimilarity().tf(tf));
tfExplanation.setDescription("tf(termFreq("+query.getTerm()+")="+tf+")");
-
+
return tfExplanation;
}
Index: org/apache/lucene/search/PhrasePrefixQuery.java
===================================================================
--- org/apache/lucene/search/PhrasePrefixQuery.java (revision 382121)
+++ org/apache/lucene/search/PhrasePrefixQuery.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultipleTermPositions;
+import org.apache.lucene.index.NormFactors;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Query;
@@ -35,7 +36,7 @@
* add(Term) on the term "Microsoft", then find all terms that has "app" as
* prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[]
* terms) to add them to the query.
- *
+ *
* @deprecated use {@link org.apache.lucene.search.MultiPhraseQuery} instead
* @author Anders Nielsen
* @version 1.0
@@ -77,7 +78,7 @@
/**
* Allows to specify the relative position of terms within the phrase.
- *
+ *
* @see PhraseQuery#add(Term, int)
* @param terms
* @param position
@@ -140,7 +141,7 @@
public void normalize(float queryNorm) {
this.queryNorm = queryNorm;
queryWeight *= queryNorm; // normalize query weight
- value = queryWeight * idf; // idf for document
+ value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
@@ -165,10 +166,10 @@
if (slop == 0)
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
- reader.norms(field));
+ reader.getNormFactors(field));
else
return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
- slop, reader.norms(field));
+ slop, reader.getNormFactors(field));
}
public Explanation explain(IndexReader reader, int doc)
@@ -207,9 +208,9 @@
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.norms(field);
+ NormFactors fieldNorms = reader.getNormFactors(field);
float fieldNorm =
- fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
+ fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Index: org/apache/lucene/index/MultiReader.java
===================================================================
--- org/apache/lucene/index/MultiReader.java (revision 382121)
+++ org/apache/lucene/index/MultiReader.java (working copy)
@@ -30,6 +30,7 @@
public class MultiReader extends IndexReader {
private IndexReader[] subReaders;
private int[] starts; // 1st docno for each segment
+ private Hashtable normFactorsCache = new Hashtable();
private Hashtable normsCache = new Hashtable();
private int maxDoc = 0;
private int numDocs = -1;
@@ -159,6 +160,18 @@
return ones;
}
+ public synchronized NormFactors getNormFactors(String field) throws IOException {
+ NormFactors factors = (NormFactors) normFactorsCache.get(field);
+ if (factors != null)
+ return factors;
+ if (!hasNorms(field))
+ return NormFactors.getEmptyInstance();
+
+ NormFactors multiNorms = new MultiNormFactors(field);
+ normsCache.put(field, multiNorms);
+ return multiNorms;
+ }
+
public synchronized byte[] norms(String field) throws IOException {
byte[] bytes = (byte[])normsCache.get(field);
if (bytes != null)
@@ -276,6 +289,24 @@
}
return fieldSet;
}
+
+ /**
+ * Represents a set of norm factors that spans a set of documents.
+ */
+ class MultiNormFactors extends NormFactors {
+ private String field;
+
+ public MultiNormFactors(String field) throws IOException {
+ this.field = field;
+ }
+
+ @Override
+ public byte getByte(int doc) throws IOException {
+ int subReader = MultiReader.this.readerIndex(doc);
+ NormFactors subNormFactors = subReaders[subReader].getNormFactors(this.field);
+ return subNormFactors.getByte(doc - MultiReader.this.starts[subReader]);
+ }
+ }
}
class MultiTermEnum extends TermEnum {
Index: org/apache/lucene/index/IndexReader.java
===================================================================
--- org/apache/lucene/index/IndexReader.java (revision 382121)
+++ org/apache/lucene/index/IndexReader.java (working copy)
@@ -44,7 +44,7 @@
document in the index. These document numbers are ephemeral--they may change
as documents are added to and deleted from an index. Clients should thus not
rely on a given document having the same number between sessions.
-
+
An IndexReader can be opened on a directory for which an IndexWriter is
opened already, but it cannot be used to delete documents from the index then.
@@ -52,13 +52,13 @@
@version $Id$
*/
public abstract class IndexReader {
-
+
public static final class FieldOption {
private String option;
private FieldOption() { }
private FieldOption(String option) {
this.option = option;
- }
+ }
public String toString() {
return this.option;
}
@@ -81,21 +81,21 @@
// all fields where termvectors with offset and position values set
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
}
-
+
/**
- * Constructor used if IndexReader is not owner of its directory.
+ * Constructor used if IndexReader is not owner of its directory.
* This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
- *
+ *
* @param directory Directory where IndexReader files reside.
*/
protected IndexReader(Directory directory) {
this.directory = directory;
}
-
+
/**
* Constructor used if IndexReader is owner of its directory.
* If IndexReader is owner of its directory, it locks its directory in case of write operations.
- *
+ *
* @param directory Directory where IndexReader files reside.
* @param segmentInfos Used for write-l
* @param closeDirectory
@@ -119,7 +119,7 @@
private Lock writeLock;
private boolean stale;
private boolean hasChanges;
-
+
/** Returns an IndexReader reading the index in an FSDirectory in the named
path. */
@@ -132,7 +132,7 @@
public static IndexReader open(File path) throws IOException {
return open(FSDirectory.getDirectory(path, false), true);
}
-
+
/** Returns an IndexReader reading the index in the given Directory. */
public static IndexReader open(final Directory directory) throws IOException {
return open(directory, false);
@@ -153,7 +153,7 @@
for (int i = 0; i < infos.size(); i++)
readers[i] = SegmentReader.get(infos.info(i));
return new MultiReader(directory, infos, closeDirectory, readers);
-
+
}
}.run();
}
@@ -162,28 +162,28 @@
/** Returns the directory this index resides in. */
public Directory directory() { return directory; }
- /**
+ /**
* Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
- * {@link #isCurrent()} instead.
+ * {@link #isCurrent()} instead.
*/
public static long lastModified(String directory) throws IOException {
return lastModified(new File(directory));
}
- /**
- * Returns the time the index in the named directory was last modified.
+ /**
+ * Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
- * {@link #isCurrent()} instead.
+ * {@link #isCurrent()} instead.
*/
public static long lastModified(File directory) throws IOException {
return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS);
}
- /**
- * Returns the time the index in the named directory was last modified.
+ /**
+ * Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
- * {@link #isCurrent()} instead.
+ * {@link #isCurrent()} instead.
*/
public static long lastModified(Directory directory) throws IOException {
return directory.fileModified(IndexFileNames.SEGMENTS);
@@ -193,7 +193,7 @@
* Reads version number from segments files. The version number is
* initialized with a timestamp and then increased by one for each change of
* the index.
- *
+ *
* @param directory where the index resides.
* @return version number.
* @throws IOException if segments file cannot be read
@@ -206,7 +206,7 @@
* Reads version number from segments files. The version number is
* initialized with a timestamp and then increased by one for each change of
* the index.
- *
+ *
* @param directory where the index resides.
* @return version number.
* @throws IOException if segments file cannot be read
@@ -222,7 +222,7 @@
* Reads version number from segments files. The version number is
* initialized with a timestamp and then increased by one for each change of
* the index.
- *
+ *
* @param directory where the index resides.
* @return version number.
* @throws IOException if segments file cannot be read.
@@ -230,12 +230,12 @@
public static long getCurrentVersion(Directory directory) throws IOException {
synchronized (directory) { // in- & inter-process sync
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
-
+
boolean locked=false;
-
+
try {
locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
-
+
return SegmentInfos.readCurrentVersion(directory);
} finally {
if (locked) {
@@ -244,7 +244,7 @@
}
}
}
-
+
/**
* Version number when this IndexReader was opened.
*/
@@ -256,18 +256,18 @@
* Check whether this IndexReader still works on a current version of the index.
* If this is not the case you will need to re-open the IndexReader to
* make sure you see the latest changes made to the index.
- *
+ *
* @throws IOException
*/
public boolean isCurrent() throws IOException {
synchronized (directory) { // in- & inter-process sync
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
-
+
boolean locked=false;
-
+
try {
locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
-
+
return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion();
} finally {
if (locked) {
@@ -284,7 +284,7 @@
* If no such fields existed, the method returns null. The term vectors that are
* returned my either be of type TermFreqVector or of type TermPositionsVector if
* positions or offsets have been stored.
- *
+ *
* @param docNumber document for which term frequency vectors are returned
* @return array of term frequency vectors. May be null if no term vectors have been
* stored for the specified document.
@@ -294,14 +294,14 @@
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
throws IOException;
-
+
/**
* Return a term frequency vector for the specified document and field. The
* returned vector contains terms and frequencies for the terms in
* the specified field of this document, if the field had the storeTermVector
- * flag set. If termvectors had been stored with positions or offsets, a
+ * flag set. If termvectors had been stored with positions or offsets, a
* TermPositionsVector is returned.
- *
+ *
* @param docNumber document for which the term frequency vector is returned
* @param field field for which the term frequency vector is returned.
* @return term frequency vector May be null if field does not exist in the specified
@@ -311,7 +311,7 @@
*/
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
throws IOException;
-
+
/**
* Returns true if an index exists at the specified directory.
* If the directory does not exist or if there is no index in it.
@@ -362,18 +362,32 @@
/** Returns true if any documents have been deleted */
public abstract boolean hasDeletions();
-
+
/** Returns true if there are norms stored for this field. */
public boolean hasNorms(String field) throws IOException {
// backward compatible implementation.
// SegmentReader has an efficient implementation.
- return norms(field) != null;
+ return getNormFactors(field) != null;
}
+ /**
+ * Returns the normalization factors for the name field of every document.
+ * This is used by the search code to score documents.
+ *
+ * @see Field#setBoost(float)
+ */
+ public NormFactors getNormFactors(String field) throws IOException {
+ // This implementation should eventually go away when {@link #norms(String)}
+ // goes away
+ return NormFactors.newInstance(norms(field));
+ }
+
+
/** Returns the byte-encoded normalization factor for the named field of
* every document. This is used by the search code to score documents.
*
* @see Field#setBoost(float)
+ * @deprecated use {@link #getNorms(String)} instead.
*/
public abstract byte[] norms(String field) throws IOException;
@@ -401,9 +415,9 @@
doSetNorm(doc, field, value);
hasChanges = true;
}
-
+
/** Implements setNorm in subclass.*/
- protected abstract void doSetNorm(int doc, String field, byte value)
+ protected abstract void doSetNorm(int doc, String field, byte value)
throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
@@ -479,7 +493,7 @@
/**
* Tries to acquire the WriteLock on this directory.
* this method is only valid if this IndexReader is directory owner.
- *
+ *
* @throws IOException If WriteLock cannot be acquired.
*/
private void aquireWriteLock() throws IOException {
@@ -544,7 +558,7 @@
* See {@link #delete(int)} for information about when this deletion will
* become effective.
* @return the number of documents deleted
- *
+ *
* @deprecated Use {@link #deleteDocuments(Term term)} instead.
*/
public final int delete(Term term) throws IOException {
@@ -556,7 +570,7 @@
* the document. Then to delete such a document, one merely constructs a
* term with the appropriate field and the unique ID string as its text and
* passes it to this method.
- * See {@link #delete(int)} for information about when this deletion will
+ * See {@link #delete(int)} for information about when this deletion will
* become effective.
* @return the number of documents deleted
*/
@@ -582,13 +596,13 @@
doUndeleteAll();
hasChanges = true;
}
-
+
/** Implements actual undeleteAll() in subclass. */
protected abstract void doUndeleteAll() throws IOException;
/**
* Commit changes resulting from delete, undeleteAll, or setNorm operations
- *
+ *
* @throws IOException
*/
protected final synchronized void commit() throws IOException{
@@ -614,10 +628,10 @@
}
hasChanges = false;
}
-
+
/** Implements commit. */
protected abstract void doCommit() throws IOException;
-
+
/**
* Closes files associated with this index.
* Also saves any new deletions to disk.
@@ -640,13 +654,13 @@
writeLock = null;
}
}
-
+
/**
* Returns a list of all unique field names that exist in the index pointed
* to by this IndexReader.
* @return Collection of Strings indicating the names of the fields
* @throws IOException if there is a problem with accessing the index
- *
+ *
* @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
*/
public abstract Collection getFieldNames() throws IOException;
@@ -659,19 +673,19 @@
* false if only unindexed fields should be returned.
* @return Collection of Strings indicating the names of the fields
* @throws IOException if there is a problem with accessing the index
- *
+ *
* @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
*/
public abstract Collection getFieldNames(boolean indexed) throws IOException;
/**
- *
- * @param storedTermVector if true, returns only Indexed fields that have term vector info,
- * else only indexed fields without term vector info
+ *
+ * @param storedTermVector if true, returns only Indexed fields that have term vector info,
+ * else only indexed fields without term vector info
* @return Collection of Strings indicating the names of the fields
- *
+ *
* @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
- */
+ */
public Collection getIndexedFieldNames(boolean storedTermVector){
if(storedTermVector){
Set fieldSet = new HashSet();
@@ -684,18 +698,18 @@
else
return getIndexedFieldNames(Field.TermVector.NO);
}
-
+
/**
* Get a list of unique field names that exist in this index, are indexed, and have
* the specified term vector information.
- *
+ *
* @param tvSpec specifies which term vector information should be available for the fields
* @return Collection of Strings indicating the names of the fields
- *
+ *
* @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
*/
public abstract Collection getIndexedFieldNames(Field.TermVector tvSpec);
-
+
/**
* Get a list of unique field names that exist in this index and have the specified
* field option information.
@@ -741,7 +755,7 @@
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
}
-
+
/**
* Prints the filename and size of each file within a given compound file.
* Add the -extract flag to extract files to the current working directory.
@@ -768,7 +782,7 @@
Directory dir = null;
CompoundFileReader cfr = null;
-
+
try {
File file = new File(filename);
String dirname = file.getAbsoluteFile().getParent();
@@ -778,7 +792,7 @@
String [] files = cfr.list();
Arrays.sort(files); // sort the array of filename so that the output is more readable
-
+
for (int i = 0; i < files.length; ++i) {
long len = cfr.fileLength(files[i]);
@@ -787,7 +801,7 @@
IndexInput ii = cfr.openInput(files[i]);
FileOutputStream f = new FileOutputStream(files[i]);
-
+
// read and write with a small buffer, which is more effectiv than reading byte by byte
byte[] buffer = new byte[1024];
int chunk = buffer.length;
@@ -797,7 +811,7 @@
f.write(buffer, 0, bufLen);
len -= bufLen;
}
-
+
f.close();
ii.close();
}
Index: org/apache/lucene/index/NormFactors.java
===================================================================
--- org/apache/lucene/index/NormFactors.java (revision 0)
+++ org/apache/lucene/index/NormFactors.java (revision 0)
@@ -0,0 +1,99 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.Similarity;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Abstract class that represents the normalization factors for each document.
+ * Provides an abstraction from the underlying byte array to allow lazy loading
+ * or mmaped access.
+ */
+
+public abstract class NormFactors {
+ /**
+ * Retrieve an encoded normalization factor from the normalization array.
+ * @param doc the document number. Also the index into the byte array
+ * @return the byte representation of the normalization factor.
+ * @throws IOException
+ * @see Similarity#decodeNorm(byte)
+ */
+ public abstract byte getByte(int doc) throws IOException;
+
+ /**
+ * Retrieve a normalization factor from the normalization array.
+ * @param doc the document number. Also the index into the byte array.
+ * @return the normalization factor
+ */
+ public float getFactor(int doc) throws IOException {
+ return Similarity.decodeNorm(getByte(doc));
+ }
+
+ /**
+ * @return a NormFactors that returns 1.0f for all
+ * normalization factors.
+ */
+ public static NormFactors getEmptyInstance() {
+ return EmptyNormFactors.SINGLETON;
+ }
+
+ /**
+ * Convert a byte array to a NormFactors.
+ * @param norms the byte array from the segment
+ * @return a new NormFactors that indexes the given byte array
+ * @deprecated You should rely on {@link IndexReader#getNormFactors(String)}.
+ */
+ public static NormFactors newInstance(byte[] norms) {
+ return new ByteNormFactors(norms);
+ }
+
+ /**
+ * Provide a "fake norms" for use when a field doesn't have normalization.
+ * This will return the result of {@link Similarity#encodeNorm(float)} for
+ * the value 1.0f.
+ */
+ static class EmptyNormFactors extends NormFactors{
+ // Stores the encoded one byte
+ public final byte encodedOne = Similarity.encodeNorm(1.0f);
+
+ private EmptyNormFactors() {}
+
+ public byte getByte(int doc) {
+ return encodedOne;
+ }
+
+ static EmptyNormFactors SINGLETON = new EmptyNormFactors();
+ }
+
+
+ /**
+ * Helper class to convert the old byte[] norms into a NormFactors.
+ *
+ */
+ static class ByteNormFactors extends NormFactors {
+ private byte[] norms;
+ public ByteNormFactors(byte[] norms) {
+ this.norms = norms;
+ }
+
+ public byte getByte(int doc) {
+ return this.norms[doc];
+ }
+ }
+}
Index: org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- org/apache/lucene/index/FilterIndexReader.java (revision 382121)
+++ org/apache/lucene/index/FilterIndexReader.java (working copy)
@@ -111,6 +111,8 @@
return in.hasNorms(field);
}
+ public NormFactors getNormFactors(String f) throws IOException { return in.getNormFactors(f); }
+ /** @deprecated */
public byte[] norms(String f) throws IOException { return in.norms(f); }
public void norms(String f, byte[] bytes, int offset) throws IOException {
in.norms(f, bytes, offset);
@@ -145,7 +147,7 @@
public Collection getIndexedFieldNames (Field.TermVector tvSpec){
return in.getIndexedFieldNames(tvSpec);
}
-
+
public Collection getFieldNames(IndexReader.FieldOption fieldNames) {
return in.getFieldNames(fieldNames);
}
Index: org/apache/lucene/index/ParallelReader.java
===================================================================
--- org/apache/lucene/index/ParallelReader.java (revision 382121)
+++ org/apache/lucene/index/ParallelReader.java (working copy)
@@ -41,7 +41,7 @@
* change rarely and small fields that change more frequently. The smaller
* fields may be re-indexed in a new index and both indexes may be searched
* together.
- *
+ *
*
Warning: It is up to you to make sure all indexes * are created and modified the same way. For example, if you add * documents to one index, you need to add the same documents in the @@ -51,7 +51,7 @@ public class ParallelReader extends IndexReader { private List readers = new ArrayList(); private SortedMap fieldToReader = new TreeMap(); - private List storedFieldReaders = new ArrayList(); + private List storedFieldReaders = new ArrayList(); private int maxDoc; private int numDocs; @@ -59,7 +59,7 @@ /** Construct a ParallelReader. */ public ParallelReader() throws IOException { super(null); } - + /** Add an IndexReader. */ public void add(IndexReader reader) throws IOException { add(reader, false); @@ -68,10 +68,10 @@ /** Add an IndexReader whose stored fields will not be returned. This can * accellerate search when stored fields are only needed from a subset of * the IndexReaders. - * - * @throws IllegalArgumentException if not all indexes contain the same number + * + * @throws IllegalArgumentException if not all indexes contain the same number * of documents - * @throws IllegalArgumentException if not all indexes have the same value + * @throws IllegalArgumentException if not all indexes have the same value * of {@link IndexReader#maxDoc()} */ public void add(IndexReader reader, boolean ignoreStoredFields) @@ -89,7 +89,7 @@ if (reader.numDocs() != numDocs) throw new IllegalArgumentException ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs()); - + Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator(); while (i.hasNext()) { // update fieldToReader map String field = (String)i.next(); @@ -169,6 +169,13 @@ return ((IndexReader)fieldToReader.get(field)).hasNorms(field); } + public NormFactors getNormFactors(String field) throws IOException { + return ((IndexReader)fieldToReader.get(field)).getNormFactors(field); + } + + /** + * @deprecated + */ public byte[] norms(String field) throws IOException { return ((IndexReader)fieldToReader.get(field)).norms(field); } @@ -264,12 +271,12 @@ if (field != null) termEnum = ((IndexReader)fieldToReader.get(field)).terms(); } - + public ParallelTermEnum(Term term) throws IOException { field = term.field(); termEnum = ((IndexReader)fieldToReader.get(field)).terms(term); } - + public boolean next() throws IOException { if (field == null) return false; @@ -279,7 +286,7 @@ // still within field? if (next && termEnum.term().field() == field) return true; // yes, keep going - + termEnum.close(); // close old termEnum // find the next field, if any @@ -290,7 +297,7 @@ } return false; // no more fields - + } public Term term() { return termEnum.term(); } Index: org/apache/lucene/index/SegmentReader.java =================================================================== --- org/apache/lucene/index/SegmentReader.java (revision 382121) +++ org/apache/lucene/index/SegmentReader.java (working copy) @@ -51,7 +51,7 @@ // Compound File Reader when based on a compound file segment CompoundFileReader cfsReader = null; - private class Norm { + private class Norm extends NormFactors { public Norm(IndexInput in, int number) { this.in = in; @@ -81,6 +81,11 @@ directory().renameFile(segment + ".tmp", fileName); this.dirty = false; } + + @Override + public byte getByte(int doc) { + return bytes[doc]; + } } private Hashtable norms = new Hashtable(); @@ -455,7 +460,11 @@ return ones; } + // can return null if norms aren't stored + /** + * @deprecated + */ protected synchronized byte[] getNorms(String field) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) return null; // not indexed, or norms not stored @@ -468,6 +477,18 @@ return norm.bytes; } + public synchronized NormFactors getNormFactors(String field) throws IOException { + Norm norm = (Norm) norms.get(field); + if (norm == null) return NormFactors.getEmptyInstance(); // not indexed, or norms not stored + + if (norm.bytes == null) { // value not yet read + byte[] bytes = new byte[maxDoc()]; + norms(field, bytes, 0); + norm.bytes = bytes; // cache it + } + return norm; + } + // returns fake norms if norms aren't available public synchronized byte[] norms(String field) throws IOException { byte[] bytes = getNorms(field); @@ -536,7 +557,7 @@ } } } - + /** * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. * @return TermVectorsReader @@ -549,7 +570,7 @@ } return tvReader; } - + /** Return a term frequency vector for the specified document and field. The * vector returned contains term numbers and frequencies for all terms in * the specified field of this document, if the field had storeTermVector @@ -559,13 +580,13 @@ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) + if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) return null; - + TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; - + return termVectorsReader.get(docNumber, field); } @@ -580,11 +601,11 @@ public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { if (termVectorsReaderOrig == null) return null; - + TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; - + return termVectorsReader.get(docNumber); } }