Index: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0) @@ -0,0 +1,246 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.BulkPostingsEnum; +import org.apache.lucene.index.BulkPostingsEnum.BlockReader; +import org.apache.lucene.util.Bits; + +/** Expert: A Scorer for documents matching a Term. + * This scorer only makes sense for the omitTF=true case + */ +final class MatchOnlyTermScorer extends Scorer { + private final BulkPostingsEnum docsEnum; + private final byte[] norms; + private int doc; + + private final int[] docDeltas; + private int docPointer; + private int docPointerMax; + private boolean first = true; + + private final float rawScore; + private final BlockReader docDeltasReader; + private final Bits skipDocs; + private final int docFreq; + private int count; + + /** + * Construct a TermScorer. + * + * @param weight + * The weight of the Term in the query. + * @param td + * An iterator over the documents matching the Term. + * @param similarity + * The Similarity implementation to be used for score + * computations. + * @param norms + * The field norms of the document fields for the Term. + */ + MatchOnlyTermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltasReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { + super(similarity, weight); + + assert td.getFreqsReader() == null; + + this.docsEnum = td; + this.docFreq = docFreq; + this.docDeltasReader = docDeltasReader; + docDeltas = docDeltasReader.getBuffer(); + reset(); + + this.skipDocs = skipDocs; + this.norms = norms; + rawScore = getSimilarity().tf(1f) * weight.getValue(); + } + + @Override + public void score(Collector c) throws IOException { + score(c, Integer.MAX_VALUE, nextDoc()); + } + + // firstDocID is ignored since nextDoc() sets 'doc' + @Override + protected boolean score(Collector c, int end, int firstDocID) throws IOException { + c.setScorer(this); + // nocommit -- this can leave scorer on a deleted doc... + while (doc < end) { // for docs in window + if (skipDocs == null || !skipDocs.get(doc)) { + c.collect(doc); // collect + } + if (count == docFreq) { + doc = NO_MORE_DOCS; + return false; + } + count++; + fillDocDeltas(); + doc += docDeltas[docPointer]; + } + return true; + } + + + + @Override + public int docID() { + return first ? -1 : doc; + } + + @Override + public float freq() { + return 1.0f; + } + + /** + * Advances to the next document matching the query.
+ * The iterator over the matching documents is buffered using + * {@link TermDocs#read(int[],int[])}. + * + * @return the document matching the query or NO_MORE_DOCS if there are no more documents. + */ + @Override + public int nextDoc() throws IOException { + while(count < docFreq) { + fillDocDeltas(); + count++; + doc += docDeltas[docPointer]; + first = false; + assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length()); + if (skipDocs == null || !skipDocs.get(doc)) { + return doc; + } + } + + return doc = NO_MORE_DOCS; + } + + @Override + public float score() { + assert !first; + assert doc != NO_MORE_DOCS; + + return norms == null ? rawScore : rawScore * getSimilarity().decodeNormValue(norms[doc]); // normalize for field + } + + /** + * Advances to the first match beyond the current whose document number is + * greater than or equal to a given target.
+ * The implementation uses {@link DocsEnum#advance(int)}. + * + * @param target + * The target document number. + * @return the matching document or NO_MORE_DOCS if none exist. + */ + @Override + public int advance(final int target) throws IOException { + + // nocommit: should we, here, optimize .advance(target that isn't + // too far away) into scan? seems like simple win? + + // first scan current doc deltas block + for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) { + assert first || docDeltas[docPointer] > 0; + doc += docDeltas[docPointer]; + first = false; + count++; + + if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { + return doc; + } + } + + if (count == docFreq) { + return doc = NO_MORE_DOCS; + } + + // not found in current block, seek underlying stream + BulkPostingsEnum.JumpResult jumpResult = docsEnum.jump(target, count); + if (jumpResult != null) { + count = jumpResult.count; + doc = jumpResult.docID; + first = false; + reset(); + } else { + // seek did not jump -- just fill next buffer + docPointerMax = docDeltasReader.fill(); + if (docPointerMax != 0) { + docPointer = 0; + assert first || docDeltas[0] > 0; + doc += docDeltas[0]; + count++; + first = false; + } else { + return doc = NO_MORE_DOCS; + } + } + + // now scan + return scan(target); + } + + private int scan(final int target) throws IOException { + while(true) { + assert doc >= 0 && doc != NO_MORE_DOCS; + if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { + return doc; + } + + if (count >= docFreq) { + break; + } + + if (++docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + if (docPointerMax != 0) { + docPointer = 0; + } else { + return doc = NO_MORE_DOCS; + } + } + + assert first || docDeltas[docPointer] > 0; + doc += docDeltas[docPointer]; + count++; + } + return doc = NO_MORE_DOCS; + } + + private void fillDocDeltas() throws IOException { + if (++docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + assert docPointerMax != 0; + docPointer = 0; + } + } + + private void reset() throws IOException { + docPointerMax = docDeltasReader.end(); + docPointer = docDeltasReader.offset(); + if (docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + } + docPointer--; + } + + /** Returns a string representation of this TermScorer. */ + @Override + public String toString() { return "scorer(" + weight + ")"; } + +} Property changes on: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1049446) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.BulkPostingsEnum.BlockReader; import org.apache.lucene.search.Explanation.IDFExplanation; import org.apache.lucene.util.ToStringUtils; @@ -85,10 +86,17 @@ if (docs == null) { return null; } - // nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away. - return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()), - reader.getDeletedDocs(), similarity, reader.norms(term.field())); + final int docFreq = reader.docFreq(term.field(), term.bytes()); + final BlockReader docDeltas = docs.getDocDeltasReader(); + final BlockReader frequencies = docs.getFreqsReader(); + if (frequencies == null) { + return new MatchOnlyTermScorer(this, docs, docDeltas, docFreq, + reader.getDeletedDocs(), similarity, reader.norms(term.field())); + } else { + return new TermScorer(this, docs, docDeltas, frequencies, docFreq, + reader.getDeletedDocs(), similarity, reader.norms(term.field())); + } } @Override Index: lucene/src/java/org/apache/lucene/search/TermScorer.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermScorer.java (revision 1049446) +++ lucene/src/java/org/apache/lucene/search/TermScorer.java (working copy) @@ -20,14 +20,15 @@ import java.io.IOException; import org.apache.lucene.index.BulkPostingsEnum; +import org.apache.lucene.index.BulkPostingsEnum.BlockReader; import org.apache.lucene.util.Bits; /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { - private BulkPostingsEnum docsEnum; - private byte[] norms; - private float weightValue; + private final BulkPostingsEnum docsEnum; + private final byte[] norms; + private final float weightValue; private int doc; private final int[] docDeltas; @@ -40,9 +41,9 @@ private int freqPointerMax; private static final int SCORE_CACHE_SIZE = 32; - private float[] scoreCache = new float[SCORE_CACHE_SIZE]; - private final BulkPostingsEnum.BlockReader freqsReader; - private final BulkPostingsEnum.BlockReader docDeltasReader; + private final float[] scoreCache = new float[SCORE_CACHE_SIZE]; + private final BlockReader freqsReader; + private final BlockReader docDeltasReader; private final Bits skipDocs; private final int docFreq; private int count; @@ -60,33 +61,15 @@ * @param norms * The field norms of the document fields for the Term. */ - TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { + TermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltaReader, BlockReader freqReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { super(similarity, weight); - this.docsEnum = td; this.docFreq = docFreq; - docDeltasReader = td.getDocDeltasReader(); + this.docDeltasReader = docDeltaReader; docDeltas = docDeltasReader.getBuffer(); - docPointerMax = docDeltasReader.end(); - docPointer = docDeltasReader.offset(); - if (docPointer >= docPointerMax) { - docPointerMax = docDeltasReader.fill(); - } - docPointer--; - - freqsReader = td.getFreqsReader(); - if (freqsReader != null) { - freqs = freqsReader.getBuffer(); - freqPointerMax = freqsReader.end(); - freqPointer = freqsReader.offset(); - if (freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - } - freqPointer--; - } else { - freqs = null; - } - + this.freqsReader = freqReader; + freqs = freqsReader.getBuffer(); + reset(); this.skipDocs = skipDocs; this.norms = norms; this.weightValue = weight.getValue(); @@ -104,11 +87,9 @@ @Override protected boolean score(Collector c, int end, int firstDocID) throws IOException { c.setScorer(this); - //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc); // nocommit -- this can leave scorer on a deleted doc... while (doc < end) { // for docs in window if (skipDocs == null || !skipDocs.get(doc)) { - //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq); c.collect(doc); // collect } if (count == docFreq) { @@ -116,40 +97,8 @@ return false; } count++; - docPointer++; - - //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq); - - if (docPointer >= docPointerMax) { - docPointerMax = docDeltasReader.fill(); - //System.out.println(" refill! dpMax=" + docPointerMax + " reader=" + docDeltasReader); - assert docPointerMax != 0; - docPointer = 0; - - if (freqsReader != null) { - freqPointer++; - // NOTE: this code is intentionally dup'd - // (specialized) w/ the else clause, for better CPU - // branch prediction (assuming compiler doesn't - // de-dup): for codecs that always bulk read same - // number of docDeltas & freqs (standard, for, - // pfor), this if will always be true. Other codecs - // (simple9/16) will not be aligned: - if (freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } - } - } else if (freqsReader != null) { - freqPointer++; - if (freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } - } - + fillDeltas(); + fillFreq(); doc += docDeltas[docPointer]; } return true; @@ -162,11 +111,7 @@ @Override public float freq() { - if (freqsReader != null) { - return freqs[freqPointer]; - } else { - return 1.0f; - } + return freqs[freqPointer]; } /** @@ -178,64 +123,25 @@ */ @Override public int nextDoc() throws IOException { - //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq); while(count < docFreq) { - docPointer++; - if (docPointer >= docPointerMax) { - //System.out.println("ts.nd refill docs"); - docPointerMax = docDeltasReader.fill(); - assert docPointerMax != 0; - docPointer = 0; - if (freqsReader != null) { - // NOTE: this code is intentionally dup'd - // (specialized) w/ the else clause, for better CPU - // branch prediction (assuming compiler doesn't - // de-dup): for codecs that always bulk read same - // number of docDeltas & freqs (standard, for, - // pfor), this if will always be true. Other codecs - // (simple9/16) will not be aligned: - freqPointer++; - if (freqPointer >= freqPointerMax) { - //System.out.println("ts.nd refill freqs"); - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } - } - } else { - if (freqsReader != null) { - freqPointer++; - if (freqPointer >= freqPointerMax) { - //System.out.println("ts.nd refill freqs"); - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } - } - } + fillDeltas(); + fillFreq(); count++; doc += docDeltas[docPointer]; first = false; assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length()); if (skipDocs == null || !skipDocs.get(doc)) { - //System.out.println(" ret doc=" + doc + " freq=" + freq()); return doc; } } - //System.out.println(" end"); return doc = NO_MORE_DOCS; } - + @Override public float score() { assert !first; - final int freq; - if (freqsReader == null) { - freq = 1; - } else { - freq = freqs[freqPointer]; - } + final int freq = freqs[freqPointer]; assert freq > 0; assert doc != NO_MORE_DOCS; float raw = // compute tf(f)*weight @@ -256,7 +162,7 @@ * @return the matching document or NO_MORE_DOCS if none exist. */ @Override - public int advance(int target) throws IOException { + public int advance(final int target) throws IOException { // nocommit: should we, here, optimize .advance(target that isn't // too far away) into scan? seems like simple win? @@ -267,11 +173,7 @@ doc += docDeltas[docPointer]; first = false; count++; - if (freqsReader != null && ++freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } + fillFreq(); if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { return doc; } @@ -282,26 +184,13 @@ } // not found in current block, seek underlying stream - BulkPostingsEnum.JumpResult jumpResult; + final BulkPostingsEnum.JumpResult jumpResult; if (target - doc > docDeltas.length && // avoid useless jumps (jumpResult = docsEnum.jump(target, count)) != null) { count = jumpResult.count; doc = jumpResult.docID; first = false; - docPointer = docDeltasReader.offset(); - docPointerMax = docDeltasReader.end(); - if (docPointer >= docPointerMax) { - docPointerMax = docDeltasReader.fill(); - } - docPointer--; - if (freqsReader != null) { - freqPointer = freqsReader.offset(); - freqPointerMax = freqsReader.end(); - if (freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - } - freqPointer--; - } + reset(); } else { // seek did not jump -- just fill next buffer docPointerMax = docDeltasReader.fill(); @@ -314,14 +203,14 @@ } else { return doc = NO_MORE_DOCS; } - if (freqsReader != null && ++freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } + fillFreq(); } - // now scan + // now scan -- let the compiler inline this + return scan(target); + } + + private int scan(final int target) throws IOException { while(true) { assert doc >= 0 && doc != NO_MORE_DOCS; if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { @@ -341,12 +230,7 @@ } } - if (freqsReader != null && ++freqPointer >= freqPointerMax) { - freqPointerMax = freqsReader.fill(); - assert freqPointerMax != 0; - freqPointer = 0; - } - + fillFreq(); assert first || docDeltas[docPointer] > 0; doc += docDeltas[docPointer]; count++; @@ -357,5 +241,35 @@ /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")"; } - + + private final void fillFreq() throws IOException { + if (++freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } + } + + private void fillDeltas() throws IOException { + if (++docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + assert docPointerMax != 0; + docPointer = 0; + } + } + + private final void reset() throws IOException { + docPointer = docDeltasReader.offset(); + docPointerMax = docDeltasReader.end(); + if (docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + } + --docPointer; + freqPointer = freqsReader.offset(); + freqPointerMax = freqsReader.end(); + if (freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + } + --freqPointer; + } }