Index: lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java (working copy) @@ -0,0 +1,77 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestQueryRescorer extends LuceneTestCase { + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // 1 extra token, but wizard and oz are close; + doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); + bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); + IndexSearcher searcher = newSearcher(r); + + TopDocs hits = searcher.search(bq, 10); + assertEquals(2, hits.totalHits); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + // Now, resort using PhraseQuery: + PhraseQuery pq = new PhraseQuery(); + pq.setSlop(5); + pq.add(new Term("field", "wizard")); + pq.add(new Term("field", "oz")); + + TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10); + + // Resorting changed the order: + assertEquals(2, hits2.totalHits); + assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } + + // nocommit more, e.g. pull DocValues for scoring factors +} Property changes on: lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (revision 1576785) +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (working copy) @@ -29,8 +29,8 @@ /** The number of subscorers that provide the current match. */ protected int nrMatchers = -1; - protected double score = Float.NaN; private final float[] coord; + private final int[] matches; /** Construct a DisjunctionScorer. * @param weight The weight to be used. @@ -43,6 +43,7 @@ if (numScorers <= 1) { throw new IllegalArgumentException("There must be at least 2 subScorers"); } + matches = new int[numScorers]; this.coord = coord; } @@ -69,23 +70,19 @@ final Scorer sub = subScorers[0]; doc = sub.docID(); if (doc != NO_MORE_DOCS) { - score = sub.score(); + matches[0] = 0; nrMatchers = 1; - countMatches(1); - countMatches(2); + gatherMatches(1); + gatherMatches(2); } } - // TODO: this currently scores, but so did the previous impl // TODO: remove recursion. - // TODO: if we separate scoring, out of here, - // then change freq() to just always compute it from scratch - private void countMatches(int root) throws IOException { + private void gatherMatches(int root) throws IOException { if (root < numScorers && subScorers[root].docID() == doc) { - nrMatchers++; - score += subScorers[root].score(); - countMatches((root<<1)+1); - countMatches((root<<1)+2); + matches[nrMatchers++] = root; + gatherMatches((root<<1)+1); + gatherMatches((root<<1)+2); } } @@ -94,7 +91,15 @@ */ @Override public float score() throws IOException { - return (float)score * coord[nrMatchers]; + double score = 0.0; + for(int i=0;i= endDoc) { + return NO_MORE_DOCS; + } + pos++; + assert acceptDocs == null || acceptDocs.get(docID-context.docBase); + return docID-context.docBase; + } + + @Override + public long cost() { + // nocommit? + return 0; + } + + @Override + public int advance(int target) { + int loc = Arrays.binarySearch(docIDs, target + context.docBase); + if (loc < 0) { + loc = -loc-1; + } + pos = loc; + return nextDoc(); + } + }; + } + }; + } + } + + /** @params + * searcher {@link IndexSearcher} used to produce the + * first pass topDocs + * topDocs Hits from the first pass search + * query Query to use for rescoring + * scoreWeight The score of the returned hits is + * firstPassScore + weight * queryScore + * topN How many re-scored hits to return + */ + public static TopDocs rescore(IndexSearcher searcher, TopDocs topDocs, Query query, double weight, int topN) throws IOException { + int[] docIDs = new int[topDocs.scoreDocs.length]; + for(int i=0;i newScores = new HashMap(); + for(ScoreDoc sd : topDocs2.scoreDocs) { + newScores.put(sd.doc, sd.score); + } + + ScoreDoc[] newHits = new ScoreDoc[topDocs.scoreDocs.length]; + for(int i=0;i() { + @Override + public int compare(ScoreDoc a, ScoreDoc b) { + // Sort by score descending, then docID ascending: + if (a.score > b.score) { + return -1; + } else if (a.score < b.score) { + return 1; + } else { + // This subtraction can't overflow int + // because docIDs are >= 0: + return a.doc - b.doc; + } + } + }); + + if (topN < newHits.length) { + ScoreDoc[] subset = new ScoreDoc[topN]; + System.arraycopy(newHits, 0, subset, 0, topN); + newHits = subset; + } + + return new TopDocs(topDocs.totalHits, newHits, newHits[0].score); + } +} Property changes on: lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (revision 1576785) +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (working copy) @@ -27,13 +27,10 @@ class DisjunctionMaxScorer extends DisjunctionScorer { /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ private final float tieBreakerMultiplier; + private final int[] matches; private int doc = -1; private int freq = -1; - /* Used when scoring currently matching doc. */ - private float scoreSum; - private float scoreMax; - /** * Creates a new instance of DisjunctionMaxScorer * @@ -52,6 +49,7 @@ Scorer[] subScorers, int numScorers) { super(weight, subScorers, numScorers); this.tieBreakerMultiplier = tieBreakerMultiplier; + matches = new int[subScorers.length]; } @Override @@ -83,28 +81,32 @@ */ @Override public float score() throws IOException { + float scoreSum = subScorers[matches[0]].score(); + float scoreMax = scoreSum; + for(int i=1;isims. */ + public SwitchingSimilarity(Similarity... sims) { + this.sims = sims; + } + + @Override + public long computeNorm(FieldInvertState state) { + return sims[0].computeNorm(state); + } + + @Override + public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { + SimWeight subStats[] = new SimWeight[sims.length]; + for (int i = 0; i < subStats.length; i++) { + subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats); + } + return new SwitchingStats(subStats); + } + + @Override + public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + ExactSimScorer subScorers[] = new ExactSimScorer[sims.length]; + for (int i = 0; i < subScorers.length; i++) { + subScorers[i] = sims[i].exactSimScorer(((SwitchingStats)stats).subStats[i], context); + } + return new SwitchingExactSimScorer(subScorers); + } + + @Override + public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + SloppySimScorer subScorers[] = new SloppySimScorer[sims.length]; + for (int i = 0; i < subScorers.length; i++) { + subScorers[i] = sims[i].sloppySimScorer(((SwitchingStats)stats).subStats[i], context); + } + return new SwitchingSloppySimScorer(subScorers); + } + + /** Moves to the next sub-scorer. */ + public void switchScorer() { + scorerIndex++; + if (scorerIndex == sims.length) { + scorerIndex = 0; + } + } + + public class SwitchingExactSimScorer extends ExactSimScorer { + private final ExactSimScorer subScorers[]; + + SwitchingExactSimScorer(ExactSimScorer subScorers[]) { + this.subScorers = subScorers; + } + + @Override + public float score(int doc, int freq) { + return subScorers[scorerIndex].score(doc, freq); + } + + @Override + public Explanation explain(int doc, Explanation freq) { + return subScorers[scorerIndex].explain(doc, freq); + } + } + + public class SwitchingSloppySimScorer extends SloppySimScorer { + private final SloppySimScorer subScorers[]; + + SwitchingSloppySimScorer(SloppySimScorer subScorers[]) { + this.subScorers = subScorers; + } + + @Override + public float score(int doc, float freq) { + return subScorers[scorerIndex].score(doc, freq); + } + + @Override + public Explanation explain(int doc, Explanation freq) { + return subScorers[scorerIndex].explain(doc, freq); + } + + @Override + public float computeSlopFactor(int distance) { + return subScorers[0].computeSlopFactor(distance); + } + + @Override + public float computePayloadFactor(int doc, int start, int end, BytesRef payload) { + return subScorers[0].computePayloadFactor(doc, start, end, payload); + } + } + + static class SwitchingStats extends SimWeight { + final SimWeight subStats[]; + + SwitchingStats(SimWeight subStats[]) { + this.subStats = subStats; + } + + @Override + public float getValueForNormalization() { + float sum = 0.0f; + for (SimWeight stat : subStats) { + sum += stat.getValueForNormalization(); + } + return sum / subStats.length; + } + + @Override + public void normalize(float queryNorm, float topLevelBoost) { + for (SimWeight stat : subStats) { + stat.normalize(queryNorm, topLevelBoost); + } + } + } +} Property changes on: lucene/misc/src/java/org/apache/lucene/search/SwitchingSimilarity.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/search/SwitchingSimilarityCollector.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/search/SwitchingSimilarityCollector.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/search/SwitchingSimilarityCollector.java (working copy) @@ -0,0 +1,77 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; + +/** When used with {@link SwitchingSimilarity}, lets you collect + * hits for a single query using multiple similarities into + * separate collectors. Be sure the number of collectors + * you pass here matches exactly the number of + * similarities you pass to {@link SwitchingSimilarity}. + * + *

There can easily be cases where this does not work, + * e.g. if the {@link ScoreCachingWrapperScorer} is involved + * in the scoring chain. + * + * @lucene.experimental */ + +public final class SwitchingSimilarityCollector extends Collector { + private final Collector[] subs; + private final SwitchingSimilarity switchingSim; + private Scorer scorer; + + public SwitchingSimilarityCollector(SwitchingSimilarity switchingSim, Collector... subs) { + this.subs = subs; + this.switchingSim = switchingSim; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + for(Collector sub : subs) { + sub.setScorer(scorer); + } + } + + @Override + public void collect(int docID) throws IOException { + System.out.println("collect doc=" + docID + " scorer=" + scorer); + for(Collector sub : subs) { + switchingSim.switchScorer(); + System.out.println(" score=" + scorer.score()); + sub.collect(docID); + } + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + for(Collector sub : subs) { + sub.setNextReader(context); + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + // We must return false here, because we need the + // .score() to not be cached for each hit: + return false; + } +} Property changes on: lucene/misc/src/java/org/apache/lucene/search/SwitchingSimilarityCollector.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property