Index: org/apache/lucene/search/spans/SpanScorer.java =================================================================== --- org/apache/lucene/search/spans/SpanScorer.java (revision 382121) +++ org/apache/lucene/search/spans/SpanScorer.java (working copy) @@ -18,6 +18,7 @@ import java.io.IOException; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Explanation; @@ -27,7 +28,7 @@ class SpanScorer extends Scorer { private Spans spans; private Weight weight; - private byte[] norms; + private NormFactors normFactors; private float value; private boolean firstTime = true; @@ -36,13 +37,18 @@ private int doc; private float freq; + SpanScorer(Spans spans, Weight weight, Similarity similarity, NormFactors normFactors) { + super(similarity); + this.spans = spans; + this.normFactors = normFactors; + this.weight = weight; + this.value = weight.getValue(); + } + + /** @deprecated */ SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException { - super(similarity); - this.spans = spans; - this.norms = norms; - this.weight = weight; - this.value = weight.getValue(); + this(spans, weight, similarity, NormFactors.newInstance(norms)); } public boolean next() throws IOException { @@ -69,7 +75,7 @@ public float score() throws IOException { float raw = getSimilarity().tf(freq) * value; // raw score - return raw * Similarity.decodeNorm(norms[doc]); // normalize + return raw * normFactors.getFactor(doc); // normalize } public boolean skipTo(int target) throws IOException { Index: org/apache/lucene/search/spans/SpanWeight.java =================================================================== --- org/apache/lucene/search/spans/SpanWeight.java (revision 382121) +++ org/apache/lucene/search/spans/SpanWeight.java (working copy) @@ -22,6 +22,7 @@ import java.util.Collection; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; @@ -67,7 +68,7 @@ public Scorer scorer(IndexReader reader) throws IOException { return new SpanScorer(query.getSpans(reader), this, similarity, - reader.norms(query.getField())); + reader.getNormFactors(query.getField())); } public Explanation explain(IndexReader reader, int doc) @@ -121,9 +122,9 @@ fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + NormFactors fieldNorms = reader.getNormFactors(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; + fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); Index: org/apache/lucene/search/SloppyPhraseScorer.java =================================================================== --- org/apache/lucene/search/SloppyPhraseScorer.java (revision 382121) +++ org/apache/lucene/search/SloppyPhraseScorer.java (working copy) @@ -16,14 +16,22 @@ * limitations under the License. */ -import org.apache.lucene.index.TermPositions; - import java.io.IOException; +import org.apache.lucene.index.NormFactors; +import org.apache.lucene.index.TermPositions; + final class SloppyPhraseScorer extends PhraseScorer { private int slop; SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, + int slop, NormFactors normFactors) { + super(weight, tps, positions, similarity, normFactors); + this.slop = slop; + } + + /** @deprecated */ + SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, int slop, byte[] norms) { super(weight, tps, positions, similarity, norms); this.slop = slop; Index: org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- org/apache/lucene/search/MultiPhraseQuery.java (revision 382121) +++ org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultipleTermPositions; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermPositions; import org.apache.lucene.search.Query; @@ -76,7 +77,7 @@ /** * Allows to specify the relative position of terms within the phrase. - * + * * @see PhraseQuery#add(Term, int) * @param terms * @param position @@ -139,7 +140,7 @@ public void normalize(float queryNorm) { this.queryNorm = queryNorm; queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + value = queryWeight * idf; // idf for document } public Scorer scorer(IndexReader reader) throws IOException { @@ -164,10 +165,10 @@ if (slop == 0) return new ExactPhraseScorer(this, tps, getPositions(), similarity, - reader.norms(field)); + reader.getNormFactors(field)); else return new SloppyPhraseScorer(this, tps, getPositions(), similarity, - slop, reader.norms(field)); + slop, reader.getNormFactors(field)); } public Explanation explain(IndexReader reader, int doc) @@ -206,9 +207,9 @@ fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + NormFactors fieldNorms = reader.getNormFactors(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; + fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); Index: org/apache/lucene/search/PhraseScorer.java =================================================================== --- org/apache/lucene/search/PhraseScorer.java (revision 382121) +++ org/apache/lucene/search/PhraseScorer.java (working copy) @@ -22,7 +22,10 @@ abstract class PhraseScorer extends Scorer { private Weight weight; + + /** @deprecated */ protected byte[] norms; + private NormFactors normFactors; protected float value; private boolean firstTime = true; @@ -34,8 +37,20 @@ PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, - byte[] norms) { + NormFactors normFactors) { + this(weight, tps, positions, similarity, normFactors, null); + } + + /** @deprecated */ + PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, + byte[] norms) { + this(weight, tps, positions, similarity, NormFactors.newInstance(norms), norms); + } + + private PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, + NormFactors normFactors, byte[] norms) { super(similarity); + this.normFactors = normFactors; this.norms = norms; this.weight = weight; this.value = weight.getValue(); @@ -65,7 +80,7 @@ } return doNext(); } - + // next without initial increment private boolean doNext() throws IOException { while (more) { @@ -89,7 +104,7 @@ public float score() throws IOException { //System.out.println("scoring " + first.doc); float raw = getSimilarity().tf(freq) * value; // raw score - return raw * Similarity.decodeNorm(norms[first.doc]); // normalize + return raw * getNormFactors().getFactor(first.doc); // normalize } public boolean skipTo(int target) throws IOException { @@ -104,12 +119,12 @@ protected abstract float phraseFreq() throws IOException; private void init() throws IOException { - for (PhrasePositions pp = first; more && pp != null; pp = pp.next) + for (PhrasePositions pp = first; more && pp != null; pp = pp.next) more = pp.next(); if(more) sort(); } - + private void sort() { pq.clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) @@ -149,6 +164,13 @@ return tfExplanation; } + /** + * @return the NormFactors for the field associated with this phrase. + */ + public NormFactors getNormFactors() { + return this.normFactors; + } + public String toString() { return "scorer(" + weight + ")"; } } Index: org/apache/lucene/search/PhraseQuery.java =================================================================== --- org/apache/lucene/search/PhraseQuery.java (revision 382121) +++ org/apache/lucene/search/PhraseQuery.java (working copy) @@ -20,6 +20,7 @@ import java.util.Set; import java.util.Vector; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.IndexReader; @@ -27,7 +28,7 @@ /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". - * + * *

This query may be combined with other terms or queries with a {@link BooleanQuery}. */ public class PhraseQuery extends Query { @@ -74,7 +75,7 @@ * The relative position of the term within the phrase is specified explicitly. * This allows e.g. phrases with more than one term at the same position * or phrases with gaps (e.g. in connection with stopwords). - * + * * @param term * @param position */ @@ -130,7 +131,7 @@ public void normalize(float queryNorm) { this.queryNorm = queryNorm; queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + value = queryWeight * idf; // idf for document } public Scorer scorer(IndexReader reader) throws IOException { @@ -147,11 +148,11 @@ if (slop == 0) // optimize exact case return new ExactPhraseScorer(this, tps, getPositions(), similarity, - reader.norms(field)); + reader.getNormFactors(field)); else return new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop, - reader.norms(field)); + reader.getNormFactors(field)); } @@ -211,9 +212,9 @@ fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + NormFactors fieldNorms = reader.getNormFactors(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; + fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); Index: org/apache/lucene/search/TermQuery.java =================================================================== --- org/apache/lucene/search/TermQuery.java (revision 382121) +++ org/apache/lucene/search/TermQuery.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.IndexReader; @@ -66,7 +67,7 @@ return null; return new TermScorer(this, termDocs, similarity, - reader.norms(term.field())); + reader.getNormFactors(term.field())); } public Explanation explain(IndexReader reader, int doc) @@ -107,9 +108,9 @@ fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + NormFactors fieldNorms = reader.getNormFactors(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; + fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); Index: org/apache/lucene/search/ExactPhraseScorer.java =================================================================== --- org/apache/lucene/search/ExactPhraseScorer.java (revision 382121) +++ org/apache/lucene/search/ExactPhraseScorer.java (working copy) @@ -22,6 +22,12 @@ final class ExactPhraseScorer extends PhraseScorer { ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, + NormFactors normFactors) { + super(weight, tps, positions, similarity, normFactors); + } + + /** @deprecated */ + ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) { super(weight, tps, positions, similarity, norms); } @@ -45,7 +51,7 @@ } freq++; // all equal: a match } while (last.nextPosition()); - + return (float)freq; } } Index: org/apache/lucene/search/TermScorer.java =================================================================== --- org/apache/lucene/search/TermScorer.java (revision 382121) +++ org/apache/lucene/search/TermScorer.java (working copy) @@ -18,6 +18,7 @@ import java.io.IOException; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.index.TermDocs; /** Expert: A Scorer for documents matching a Term. @@ -25,7 +26,7 @@ final class TermScorer extends Scorer { private Weight weight; private TermDocs termDocs; - private byte[] norms; + private NormFactors normFactors; private float weightValue; private int doc; @@ -44,17 +45,30 @@ * @param norms The field norms of the document fields for the Term. */ TermScorer(Weight weight, TermDocs td, Similarity similarity, - byte[] norms) { + NormFactors normFactors) { super(similarity); this.weight = weight; this.termDocs = td; - this.norms = norms; + this.normFactors = normFactors; this.weightValue = weight.getValue(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) scoreCache[i] = getSimilarity().tf(i) * weightValue; } + /** Construct a TermScorer. + * @param weight The weight of the Term in the query. + * @param td An iterator over the documents matching the Term. + * @param similarity The Similarity implementation to be used for score computations. + * @param norms The field norms of the document fields for the Term. + * @deprecated + */ + TermScorer(Weight weight, TermDocs td, Similarity similarity, + byte[] norms) { + this(weight, td, similarity, NormFactors.newInstance(norms)); + } + + public void score(HitCollector hc) throws IOException { next(); score(hc, Integer.MAX_VALUE); @@ -62,7 +76,6 @@ protected boolean score(HitCollector c, int end) throws IOException { Similarity similarity = getSimilarity(); // cache sim in local - float[] normDecoder = Similarity.getNormDecoder(); while (doc < end) { // for docs in window int f = freqs[pointer]; float score = // compute tf(f)*weight @@ -70,7 +83,7 @@ ? scoreCache[f] // cache hit : similarity.tf(f)*weightValue; // cache miss - score *= normDecoder[norms[doc] & 0xFF]; // normalize for field + score *= normFactors.getFactor(doc); // normalize for field c.collect(doc, score); // collect score @@ -83,7 +96,7 @@ doc = Integer.MAX_VALUE; // set to sentinel value return false; } - } + } doc = docs[pointer]; } return true; @@ -110,23 +123,23 @@ doc = Integer.MAX_VALUE; // set to sentinel value return false; } - } + } doc = docs[pointer]; return true; } - public float score() { + public float score() throws IOException { int f = freqs[pointer]; float raw = // compute tf(f)*weight f < SCORE_CACHE_SIZE // check cache ? scoreCache[f] // cache hit : getSimilarity().tf(f)*weightValue; // cache miss - return raw * Similarity.decodeNorm(norms[doc]); // normalize for field + return raw * normFactors.getFactor(doc); // normalize for field } /** Skips to the first match beyond the current whose document number is - * greater than or equal to a given target. + * greater than or equal to a given target. *
The implementation uses {@link TermDocs#skipTo(int)}. * @param target The target document number. * @return true iff there is such a match. @@ -178,7 +191,7 @@ termDocs.close(); tfExplanation.setValue(getSimilarity().tf(tf)); tfExplanation.setDescription("tf(termFreq("+query.getTerm()+")="+tf+")"); - + return tfExplanation; } Index: org/apache/lucene/search/PhrasePrefixQuery.java =================================================================== --- org/apache/lucene/search/PhrasePrefixQuery.java (revision 382121) +++ org/apache/lucene/search/PhrasePrefixQuery.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultipleTermPositions; +import org.apache.lucene.index.NormFactors; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermPositions; import org.apache.lucene.search.Query; @@ -35,7 +36,7 @@ * add(Term) on the term "Microsoft", then find all terms that has "app" as * prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[] * terms) to add them to the query. - * + * * @deprecated use {@link org.apache.lucene.search.MultiPhraseQuery} instead * @author Anders Nielsen * @version 1.0 @@ -77,7 +78,7 @@ /** * Allows to specify the relative position of terms within the phrase. - * + * * @see PhraseQuery#add(Term, int) * @param terms * @param position @@ -140,7 +141,7 @@ public void normalize(float queryNorm) { this.queryNorm = queryNorm; queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + value = queryWeight * idf; // idf for document } public Scorer scorer(IndexReader reader) throws IOException { @@ -165,10 +166,10 @@ if (slop == 0) return new ExactPhraseScorer(this, tps, getPositions(), similarity, - reader.norms(field)); + reader.getNormFactors(field)); else return new SloppyPhraseScorer(this, tps, getPositions(), similarity, - slop, reader.norms(field)); + slop, reader.getNormFactors(field)); } public Explanation explain(IndexReader reader, int doc) @@ -207,9 +208,9 @@ fieldExpl.addDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + NormFactors fieldNorms = reader.getNormFactors(field); float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; + fieldNorms!=null ? fieldNorms.getFactor(doc) : 0.0f; fieldNormExpl.setValue(fieldNorm); fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); fieldExpl.addDetail(fieldNormExpl); Index: org/apache/lucene/index/MultiReader.java =================================================================== --- org/apache/lucene/index/MultiReader.java (revision 382121) +++ org/apache/lucene/index/MultiReader.java (working copy) @@ -30,6 +30,7 @@ public class MultiReader extends IndexReader { private IndexReader[] subReaders; private int[] starts; // 1st docno for each segment + private Hashtable normFactorsCache = new Hashtable(); private Hashtable normsCache = new Hashtable(); private int maxDoc = 0; private int numDocs = -1; @@ -159,6 +160,18 @@ return ones; } + public synchronized NormFactors getNormFactors(String field) throws IOException { + NormFactors factors = (NormFactors) normFactorsCache.get(field); + if (factors != null) + return factors; + if (!hasNorms(field)) + return NormFactors.getEmptyInstance(); + + NormFactors multiNorms = new MultiNormFactors(field); + normsCache.put(field, multiNorms); + return multiNorms; + } + public synchronized byte[] norms(String field) throws IOException { byte[] bytes = (byte[])normsCache.get(field); if (bytes != null) @@ -276,6 +289,24 @@ } return fieldSet; } + + /** + * Represents a set of norm factors that spans a set of documents. + */ + class MultiNormFactors extends NormFactors { + private String field; + + public MultiNormFactors(String field) throws IOException { + this.field = field; + } + + @Override + public byte getByte(int doc) throws IOException { + int subReader = MultiReader.this.readerIndex(doc); + NormFactors subNormFactors = subReaders[subReader].getNormFactors(this.field); + return subNormFactors.getByte(doc - MultiReader.this.starts[subReader]); + } + } } class MultiTermEnum extends TermEnum { Index: org/apache/lucene/index/IndexReader.java =================================================================== --- org/apache/lucene/index/IndexReader.java (revision 382121) +++ org/apache/lucene/index/IndexReader.java (working copy) @@ -44,7 +44,7 @@ document in the index. These document numbers are ephemeral--they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions. - +

An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used to delete documents from the index then. @@ -52,13 +52,13 @@ @version $Id$ */ public abstract class IndexReader { - + public static final class FieldOption { private String option; private FieldOption() { } private FieldOption(String option) { this.option = option; - } + } public String toString() { return this.option; } @@ -81,21 +81,21 @@ // all fields where termvectors with offset and position values set public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET"); } - + /** - * Constructor used if IndexReader is not owner of its directory. + * Constructor used if IndexReader is not owner of its directory. * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories. - * + * * @param directory Directory where IndexReader files reside. */ protected IndexReader(Directory directory) { this.directory = directory; } - + /** * Constructor used if IndexReader is owner of its directory. * If IndexReader is owner of its directory, it locks its directory in case of write operations. - * + * * @param directory Directory where IndexReader files reside. * @param segmentInfos Used for write-l * @param closeDirectory @@ -119,7 +119,7 @@ private Lock writeLock; private boolean stale; private boolean hasChanges; - + /** Returns an IndexReader reading the index in an FSDirectory in the named path. */ @@ -132,7 +132,7 @@ public static IndexReader open(File path) throws IOException { return open(FSDirectory.getDirectory(path, false), true); } - + /** Returns an IndexReader reading the index in the given Directory. */ public static IndexReader open(final Directory directory) throws IOException { return open(directory, false); @@ -153,7 +153,7 @@ for (int i = 0; i < infos.size(); i++) readers[i] = SegmentReader.get(infos.info(i)); return new MultiReader(directory, infos, closeDirectory, readers); - + } }.run(); } @@ -162,28 +162,28 @@ /** Returns the directory this index resides in. */ public Directory directory() { return directory; } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. + * {@link #isCurrent()} instead. */ public static long lastModified(String directory) throws IOException { return lastModified(new File(directory)); } - /** - * Returns the time the index in the named directory was last modified. + /** + * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. + * {@link #isCurrent()} instead. */ public static long lastModified(File directory) throws IOException { return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS); } - /** - * Returns the time the index in the named directory was last modified. + /** + * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. + * {@link #isCurrent()} instead. */ public static long lastModified(Directory directory) throws IOException { return directory.fileModified(IndexFileNames.SEGMENTS); @@ -193,7 +193,7 @@ * Reads version number from segments files. The version number is * initialized with a timestamp and then increased by one for each change of * the index. - * + * * @param directory where the index resides. * @return version number. * @throws IOException if segments file cannot be read @@ -206,7 +206,7 @@ * Reads version number from segments files. The version number is * initialized with a timestamp and then increased by one for each change of * the index. - * + * * @param directory where the index resides. * @return version number. * @throws IOException if segments file cannot be read @@ -222,7 +222,7 @@ * Reads version number from segments files. The version number is * initialized with a timestamp and then increased by one for each change of * the index. - * + * * @param directory where the index resides. * @return version number. * @throws IOException if segments file cannot be read. @@ -230,12 +230,12 @@ public static long getCurrentVersion(Directory directory) throws IOException { synchronized (directory) { // in- & inter-process sync Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME); - + boolean locked=false; - + try { locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT); - + return SegmentInfos.readCurrentVersion(directory); } finally { if (locked) { @@ -244,7 +244,7 @@ } } } - + /** * Version number when this IndexReader was opened. */ @@ -256,18 +256,18 @@ * Check whether this IndexReader still works on a current version of the index. * If this is not the case you will need to re-open the IndexReader to * make sure you see the latest changes made to the index. - * + * * @throws IOException */ public boolean isCurrent() throws IOException { synchronized (directory) { // in- & inter-process sync Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME); - + boolean locked=false; - + try { locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT); - + return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion(); } finally { if (locked) { @@ -284,7 +284,7 @@ * If no such fields existed, the method returns null. The term vectors that are * returned my either be of type TermFreqVector or of type TermPositionsVector if * positions or offsets have been stored. - * + * * @param docNumber document for which term frequency vectors are returned * @return array of term frequency vectors. May be null if no term vectors have been * stored for the specified document. @@ -294,14 +294,14 @@ abstract public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException; - + /** * Return a term frequency vector for the specified document and field. The * returned vector contains terms and frequencies for the terms in * the specified field of this document, if the field had the storeTermVector - * flag set. If termvectors had been stored with positions or offsets, a + * flag set. If termvectors had been stored with positions or offsets, a * TermPositionsVector is returned. - * + * * @param docNumber document for which the term frequency vector is returned * @param field field for which the term frequency vector is returned. * @return term frequency vector May be null if field does not exist in the specified @@ -311,7 +311,7 @@ */ abstract public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException; - + /** * Returns true if an index exists at the specified directory. * If the directory does not exist or if there is no index in it. @@ -362,18 +362,32 @@ /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); - + /** Returns true if there are norms stored for this field. */ public boolean hasNorms(String field) throws IOException { // backward compatible implementation. // SegmentReader has an efficient implementation. - return norms(field) != null; + return getNormFactors(field) != null; } + /** + * Returns the normalization factors for the name field of every document. + * This is used by the search code to score documents. + * + * @see Field#setBoost(float) + */ + public NormFactors getNormFactors(String field) throws IOException { + // This implementation should eventually go away when {@link #norms(String)} + // goes away + return NormFactors.newInstance(norms(field)); + } + + /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * * @see Field#setBoost(float) + * @deprecated use {@link #getNorms(String)} instead. */ public abstract byte[] norms(String field) throws IOException; @@ -401,9 +415,9 @@ doSetNorm(doc, field, value); hasChanges = true; } - + /** Implements setNorm in subclass.*/ - protected abstract void doSetNorm(int doc, String field, byte value) + protected abstract void doSetNorm(int doc, String field, byte value) throws IOException; /** Expert: Resets the normalization factor for the named field of the named @@ -479,7 +493,7 @@ /** * Tries to acquire the WriteLock on this directory. * this method is only valid if this IndexReader is directory owner. - * + * * @throws IOException If WriteLock cannot be acquired. */ private void aquireWriteLock() throws IOException { @@ -544,7 +558,7 @@ * See {@link #delete(int)} for information about when this deletion will * become effective. * @return the number of documents deleted - * + * * @deprecated Use {@link #deleteDocuments(Term term)} instead. */ public final int delete(Term term) throws IOException { @@ -556,7 +570,7 @@ * the document. Then to delete such a document, one merely constructs a * term with the appropriate field and the unique ID string as its text and * passes it to this method. - * See {@link #delete(int)} for information about when this deletion will + * See {@link #delete(int)} for information about when this deletion will * become effective. * @return the number of documents deleted */ @@ -582,13 +596,13 @@ doUndeleteAll(); hasChanges = true; } - + /** Implements actual undeleteAll() in subclass. */ protected abstract void doUndeleteAll() throws IOException; /** * Commit changes resulting from delete, undeleteAll, or setNorm operations - * + * * @throws IOException */ protected final synchronized void commit() throws IOException{ @@ -614,10 +628,10 @@ } hasChanges = false; } - + /** Implements commit. */ protected abstract void doCommit() throws IOException; - + /** * Closes files associated with this index. * Also saves any new deletions to disk. @@ -640,13 +654,13 @@ writeLock = null; } } - + /** * Returns a list of all unique field names that exist in the index pointed * to by this IndexReader. * @return Collection of Strings indicating the names of the fields * @throws IOException if there is a problem with accessing the index - * + * * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)} */ public abstract Collection getFieldNames() throws IOException; @@ -659,19 +673,19 @@ * false if only unindexed fields should be returned. * @return Collection of Strings indicating the names of the fields * @throws IOException if there is a problem with accessing the index - * + * * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)} */ public abstract Collection getFieldNames(boolean indexed) throws IOException; /** - * - * @param storedTermVector if true, returns only Indexed fields that have term vector info, - * else only indexed fields without term vector info + * + * @param storedTermVector if true, returns only Indexed fields that have term vector info, + * else only indexed fields without term vector info * @return Collection of Strings indicating the names of the fields - * + * * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)} - */ + */ public Collection getIndexedFieldNames(boolean storedTermVector){ if(storedTermVector){ Set fieldSet = new HashSet(); @@ -684,18 +698,18 @@ else return getIndexedFieldNames(Field.TermVector.NO); } - + /** * Get a list of unique field names that exist in this index, are indexed, and have * the specified term vector information. - * + * * @param tvSpec specifies which term vector information should be available for the fields * @return Collection of Strings indicating the names of the fields - * + * * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)} */ public abstract Collection getIndexedFieldNames(Field.TermVector tvSpec); - + /** * Get a list of unique field names that exist in this index and have the specified * field option information. @@ -741,7 +755,7 @@ directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release(); } - + /** * Prints the filename and size of each file within a given compound file. * Add the -extract flag to extract files to the current working directory. @@ -768,7 +782,7 @@ Directory dir = null; CompoundFileReader cfr = null; - + try { File file = new File(filename); String dirname = file.getAbsoluteFile().getParent(); @@ -778,7 +792,7 @@ String [] files = cfr.list(); Arrays.sort(files); // sort the array of filename so that the output is more readable - + for (int i = 0; i < files.length; ++i) { long len = cfr.fileLength(files[i]); @@ -787,7 +801,7 @@ IndexInput ii = cfr.openInput(files[i]); FileOutputStream f = new FileOutputStream(files[i]); - + // read and write with a small buffer, which is more effectiv than reading byte by byte byte[] buffer = new byte[1024]; int chunk = buffer.length; @@ -797,7 +811,7 @@ f.write(buffer, 0, bufLen); len -= bufLen; } - + f.close(); ii.close(); } Index: org/apache/lucene/index/NormFactors.java =================================================================== --- org/apache/lucene/index/NormFactors.java (revision 0) +++ org/apache/lucene/index/NormFactors.java (revision 0) @@ -0,0 +1,99 @@ +package org.apache.lucene.index; + +import java.io.IOException; + +import org.apache.lucene.search.Similarity; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Abstract class that represents the normalization factors for each document. + * Provides an abstraction from the underlying byte array to allow lazy loading + * or mmaped access. + */ + +public abstract class NormFactors { + /** + * Retrieve an encoded normalization factor from the normalization array. + * @param doc the document number. Also the index into the byte array + * @return the byte representation of the normalization factor. + * @throws IOException + * @see Similarity#decodeNorm(byte) + */ + public abstract byte getByte(int doc) throws IOException; + + /** + * Retrieve a normalization factor from the normalization array. + * @param doc the document number. Also the index into the byte array. + * @return the normalization factor + */ + public float getFactor(int doc) throws IOException { + return Similarity.decodeNorm(getByte(doc)); + } + + /** + * @return a NormFactors that returns 1.0f for all + * normalization factors. + */ + public static NormFactors getEmptyInstance() { + return EmptyNormFactors.SINGLETON; + } + + /** + * Convert a byte array to a NormFactors. + * @param norms the byte array from the segment + * @return a new NormFactors that indexes the given byte array + * @deprecated You should rely on {@link IndexReader#getNormFactors(String)}. + */ + public static NormFactors newInstance(byte[] norms) { + return new ByteNormFactors(norms); + } + + /** + * Provide a "fake norms" for use when a field doesn't have normalization. + * This will return the result of {@link Similarity#encodeNorm(float)} for + * the value 1.0f. + */ + static class EmptyNormFactors extends NormFactors{ + // Stores the encoded one byte + public final byte encodedOne = Similarity.encodeNorm(1.0f); + + private EmptyNormFactors() {} + + public byte getByte(int doc) { + return encodedOne; + } + + static EmptyNormFactors SINGLETON = new EmptyNormFactors(); + } + + + /** + * Helper class to convert the old byte[] norms into a NormFactors. + * + */ + static class ByteNormFactors extends NormFactors { + private byte[] norms; + public ByteNormFactors(byte[] norms) { + this.norms = norms; + } + + public byte getByte(int doc) { + return this.norms[doc]; + } + } +} Index: org/apache/lucene/index/FilterIndexReader.java =================================================================== --- org/apache/lucene/index/FilterIndexReader.java (revision 382121) +++ org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -111,6 +111,8 @@ return in.hasNorms(field); } + public NormFactors getNormFactors(String f) throws IOException { return in.getNormFactors(f); } + /** @deprecated */ public byte[] norms(String f) throws IOException { return in.norms(f); } public void norms(String f, byte[] bytes, int offset) throws IOException { in.norms(f, bytes, offset); @@ -145,7 +147,7 @@ public Collection getIndexedFieldNames (Field.TermVector tvSpec){ return in.getIndexedFieldNames(tvSpec); } - + public Collection getFieldNames(IndexReader.FieldOption fieldNames) { return in.getFieldNames(fieldNames); } Index: org/apache/lucene/index/ParallelReader.java =================================================================== --- org/apache/lucene/index/ParallelReader.java (revision 382121) +++ org/apache/lucene/index/ParallelReader.java (working copy) @@ -41,7 +41,7 @@ * change rarely and small fields that change more frequently. The smaller * fields may be re-indexed in a new index and both indexes may be searched * together. - * + * *

Warning: It is up to you to make sure all indexes * are created and modified the same way. For example, if you add * documents to one index, you need to add the same documents in the @@ -51,7 +51,7 @@ public class ParallelReader extends IndexReader { private List readers = new ArrayList(); private SortedMap fieldToReader = new TreeMap(); - private List storedFieldReaders = new ArrayList(); + private List storedFieldReaders = new ArrayList(); private int maxDoc; private int numDocs; @@ -59,7 +59,7 @@ /** Construct a ParallelReader. */ public ParallelReader() throws IOException { super(null); } - + /** Add an IndexReader. */ public void add(IndexReader reader) throws IOException { add(reader, false); @@ -68,10 +68,10 @@ /** Add an IndexReader whose stored fields will not be returned. This can * accellerate search when stored fields are only needed from a subset of * the IndexReaders. - * - * @throws IllegalArgumentException if not all indexes contain the same number + * + * @throws IllegalArgumentException if not all indexes contain the same number * of documents - * @throws IllegalArgumentException if not all indexes have the same value + * @throws IllegalArgumentException if not all indexes have the same value * of {@link IndexReader#maxDoc()} */ public void add(IndexReader reader, boolean ignoreStoredFields) @@ -89,7 +89,7 @@ if (reader.numDocs() != numDocs) throw new IllegalArgumentException ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs()); - + Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator(); while (i.hasNext()) { // update fieldToReader map String field = (String)i.next(); @@ -169,6 +169,13 @@ return ((IndexReader)fieldToReader.get(field)).hasNorms(field); } + public NormFactors getNormFactors(String field) throws IOException { + return ((IndexReader)fieldToReader.get(field)).getNormFactors(field); + } + + /** + * @deprecated + */ public byte[] norms(String field) throws IOException { return ((IndexReader)fieldToReader.get(field)).norms(field); } @@ -264,12 +271,12 @@ if (field != null) termEnum = ((IndexReader)fieldToReader.get(field)).terms(); } - + public ParallelTermEnum(Term term) throws IOException { field = term.field(); termEnum = ((IndexReader)fieldToReader.get(field)).terms(term); } - + public boolean next() throws IOException { if (field == null) return false; @@ -279,7 +286,7 @@ // still within field? if (next && termEnum.term().field() == field) return true; // yes, keep going - + termEnum.close(); // close old termEnum // find the next field, if any @@ -290,7 +297,7 @@ } return false; // no more fields - + } public Term term() { return termEnum.term(); } Index: org/apache/lucene/index/SegmentReader.java =================================================================== --- org/apache/lucene/index/SegmentReader.java (revision 382121) +++ org/apache/lucene/index/SegmentReader.java (working copy) @@ -51,7 +51,7 @@ // Compound File Reader when based on a compound file segment CompoundFileReader cfsReader = null; - private class Norm { + private class Norm extends NormFactors { public Norm(IndexInput in, int number) { this.in = in; @@ -81,6 +81,11 @@ directory().renameFile(segment + ".tmp", fileName); this.dirty = false; } + + @Override + public byte getByte(int doc) { + return bytes[doc]; + } } private Hashtable norms = new Hashtable(); @@ -455,7 +460,11 @@ return ones; } + // can return null if norms aren't stored + /** + * @deprecated + */ protected synchronized byte[] getNorms(String field) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) return null; // not indexed, or norms not stored @@ -468,6 +477,18 @@ return norm.bytes; } + public synchronized NormFactors getNormFactors(String field) throws IOException { + Norm norm = (Norm) norms.get(field); + if (norm == null) return NormFactors.getEmptyInstance(); // not indexed, or norms not stored + + if (norm.bytes == null) { // value not yet read + byte[] bytes = new byte[maxDoc()]; + norms(field, bytes, 0); + norm.bytes = bytes; // cache it + } + return norm; + } + // returns fake norms if norms aren't available public synchronized byte[] norms(String field) throws IOException { byte[] bytes = getNorms(field); @@ -536,7 +557,7 @@ } } } - + /** * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. * @return TermVectorsReader @@ -549,7 +570,7 @@ } return tvReader; } - + /** Return a term frequency vector for the specified document and field. The * vector returned contains term numbers and frequencies for all terms in * the specified field of this document, if the field had storeTermVector @@ -559,13 +580,13 @@ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) + if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) return null; - + TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; - + return termVectorsReader.get(docNumber, field); } @@ -580,11 +601,11 @@ public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { if (termVectorsReaderOrig == null) return null; - + TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; - + return termVectorsReader.get(docNumber); } }