Index: lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java (working copy) @@ -0,0 +1,73 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestQueryRescorer extends LuceneTestCase { + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // 1 extra token, but wizard and oz are close; + doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); + bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); + IndexSearcher searcher = newSearcher(r); + + TopDocs hits = searcher.search(bq, 10); + assertEquals(2, hits.totalHits); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + // Now, resort using ProxBooleanTermQuery: + ProxBooleanTermQuery pq = new ProxBooleanTermQuery("field", Occur.SHOULD); + pq.add(new BytesRef("wizard")); + pq.add(new BytesRef("oz")); + + TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10); + + assertEquals(2, hits2.totalHits); + assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } +} Property changes on: lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java (working copy) @@ -0,0 +1,52 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestProxBooleanTermQuery extends LuceneTestCase { + + public void test() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new TextField("field", "here is some text", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + ProxBooleanTermQuery q = new ProxBooleanTermQuery("field", Occur.SHOULD); + q.add(new BytesRef("here")); + q.add(new BytesRef("some")); + TopDocs hits = s.search(q, 10); + assertEquals(1, hits.totalHits); + + r.close(); + + dir.close(); + } +} Property changes on: lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/search/ProxBooleanTermQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ProxBooleanTermQuery.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/search/ProxBooleanTermQuery.java (working copy) @@ -0,0 +1,518 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + TODO + - take TermStates so a custom rewrite method can avoid + double-term lookup + - coord + - what about multi-fields? + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.TermQuery.TermWeight; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +/** Scores like {@link BooleanQuery} but also boosts matches + * where terms occur close to one another. The field must + * be indexed with positions. Typically, this query is + * too costly to run as the primary query; instead, run a + * simpler first-pass query and then use {@link + * QueryRescorer} to rescore the top results using this + * query. */ +public class ProxBooleanTermQuery extends Query { + + private final BooleanClause.Occur occur; + private final String field; + private final List terms; + private final boolean disableCoord; + + public ProxBooleanTermQuery(String field, Occur occur) { + this(field, occur, false); + } + + @Override + public String toString(String field) { + StringBuilder b = new StringBuilder(); + b.append("ProxBooleanTermQuery(field="); + b.append(this.field); + b.append(", occur="); + b.append(occur); + b.append(", terms="); + for(int i=0;i 0) { + b.append(' '); + } + b.append(terms.get(i).utf8ToString()); + } + b.append(")"); + + return b.toString(); + } + + // nocommit rewrite, e.g. only 1 clause? + + public ProxBooleanTermQuery(String field, Occur occur, boolean disableCoord) { + this.field = field; + this.occur = occur; + terms = new ArrayList(); + this.disableCoord = disableCoord; + } + + public void add(BytesRef term) { + terms.add(term); + } + + @Override + public boolean equals(Object o) { + if ((o instanceof ProxBooleanTermQuery) == false) { + return false; + } + ProxBooleanTermQuery other = (ProxBooleanTermQuery) o; + return getBoost() == other.getBoost() && + field.equals(other.field) && + disableCoord == other.disableCoord && + terms.equals(other.terms) && + occur.equals(other.occur); + } + + @Override + public int hashCode() { + int PRIME = 101; + int hashCode = Float.floatToIntBits(getBoost()); + hashCode = hashCode * PRIME + field.hashCode(); + hashCode = hashCode * PRIME + occur.hashCode(); + hashCode = hashCode * PRIME + terms.hashCode(); + hashCode = hashCode * PRIME + (disableCoord ? 17:0); + + return hashCode; + } + + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new ProxBooleanTermWeight(searcher); + } + + private class ProxBooleanTermWeight extends Weight { + + protected Similarity similarity; + private List weights; + + public ProxBooleanTermWeight(IndexSearcher searcher) throws IOException { + this.similarity = searcher.getSimilarity(); + weights = new ArrayList(); + for (BytesRef term : terms) { + TermQuery tq = new TermQuery(new Term(field, term)); + weights.add((TermWeight) tq.createWeight(searcher)); + } + } + + @Override + public float getValueForNormalization() { + float sum = 0.0f; + for (TermWeight tw : weights) { + // call sumOfSquaredWeights for all clauses in case of side effects + sum += tw.getValueForNormalization(); // sum sub weights + } + + sum *= getBoost() * getBoost(); // boost each sub-weight + + return sum; + } + + @Override + public void normalize(float norm, float topLevelBoost) { + topLevelBoost *= getBoost(); + for(TermWeight tw : weights) { + tw.normalize(norm, topLevelBoost); + } + } + + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + // nocommit todo + return null; + } + + @Override + public Query getQuery() { + return ProxBooleanTermQuery.this; + } + + @Override + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, + boolean topScorer, Bits acceptDocs) throws IOException { + + List scorers = new ArrayList(); + + int ord = 0; + for(TermWeight tw : weights) { + + TermsEnum termsEnum = tw.getTermsEnum(context); + if (termsEnum == null) { + if (occur == Occur.MUST) { + return null; + } + continue; + } + DocsAndPositionsEnum posEnum = termsEnum.docsAndPositions(acceptDocs, null, 0); + if (posEnum == null) { + throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run ProxBooleanTermQuery (term=" + tw.getQuery() + ")"); + } + TermScorer ts = new TermScorer(tw, posEnum, similarity.simScorer(tw.stats, context)); + scorers.add(new ProxTermScorer(this, ord++, posEnum, ts)); + } + + float[] coords = new float[scorers.size()+1]; + + for(int i=0;i queryTerms) { + for(BytesRef term : terms) { + queryTerms.add(new Term(field, term)); + } + } + + /** Wrapper/delegator around TermScorer, that just holds the D&PEnum + * and ord. */ + private static class ProxTermScorer extends Scorer { + final DocsAndPositionsEnum posEnum; + final TermScorer termScorer; + final int ord; + + public ProxTermScorer(Weight weight, int ord, DocsAndPositionsEnum posEnum, TermScorer termScorer) { + super(weight); + this.ord = ord; + this.posEnum = posEnum; + this.termScorer = termScorer; + } + + @Override + public int docID() { + return termScorer.docID(); + } + + @Override + public int nextDoc() throws IOException { + return termScorer.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return termScorer.advance(target); + } + + @Override + public long cost() { + return termScorer.cost(); + } + + @Override + public int freq() throws IOException { + return termScorer.freq(); + } + + @Override + public AttributeSource attributes() { + return termScorer.attributes(); + } + + @Override + public float score() throws IOException { + return termScorer.score(); + } + + @Override + public Weight getWeight() { + return termScorer.getWeight(); + } + } + + /** Utility class to merge-sort multiple + * DocsAndPositionsEnum currently "on" a single doc, and + * compute resulting score "boost". */ + private static class ProxScorer { + + private static class PosEnumAndPos { + int pos; + int posUpto; + int posLimit; + final DocsAndPositionsEnum posEnum; + final Similarity.SimScorer docScorer; + + public PosEnumAndPos(ProxTermScorer termScorer) { + this.docScorer = termScorer.termScorer.docScorer; + this.posEnum = termScorer.posEnum; + } + } + + final PosEnumAndPos[] enums; + final PosEnumAndPos[] heap; + private int heapSize; + + public ProxScorer(ProxTermScorer[] scorers) { + enums = new PosEnumAndPos[scorers.length]; + for(int i=0;i= endDoc) { + return NO_MORE_DOCS; + } + pos++; + assert acceptDocs == null || acceptDocs.get(docID-context.docBase); + return docID-context.docBase; + } + + @Override + public long cost() { + // nocommit? + return 0; + } + + @Override + public int advance(int target) { + int loc = Arrays.binarySearch(docIDs, target + context.docBase); + if (loc < 0) { + loc = -loc-1; + } + pos = loc; + return nextDoc(); + } + }; + } + }; + } + } + + /** @parmas + * searcher {@link IndexSearcher} used to produce the + * first pass topDocs + * topDocs Hits from the first pass search + * query Query to use for rescoring + * scoreWeight The score of the returned hits is + * firstPassScore + weight * queryScore + * topN How many re-scored hits to return + */ + public static TopDocs rescore(IndexSearcher searcher, TopDocs topDocs, Query query, double weight, int topN) throws IOException { + int[] docIDs = new int[topDocs.scoreDocs.length]; + for(int i=0;i newScores = new HashMap(); + for(ScoreDoc sd : topDocs2.scoreDocs) { + newScores.put(sd.doc, sd.score); + } + + ScoreDoc[] newHits = new ScoreDoc[topDocs.scoreDocs.length]; + for(int i=0;i() { + @Override + public int compare(ScoreDoc a, ScoreDoc b) { + // Sort by score descending, then docID ascending: + if (a.score > b.score) { + return -1; + } else if (a.score < b.score) { + return 1; + } else { + // This subtraction can't overflow int + // because docIDs are >= 0: + return a.doc - b.doc; + } + } + }); + + if (topN < newHits.length) { + ScoreDoc[] subset = new ScoreDoc[topN]; + System.arraycopy(newHits, 0, subset, 0, topN); + newHits = subset; + } + + return new TopDocs(topDocs.totalHits, newHits, newHits[0].score); + } +} Property changes on: lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/TermQuery.java (revision 1532782) +++ lucene/core/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -44,7 +44,7 @@ final class TermWeight extends Weight { private final Similarity similarity; - private final Similarity.SimWeight stats; + final Similarity.SimWeight stats; private final TermContext termStates; public TermWeight(IndexSearcher searcher, TermContext termStates) @@ -91,7 +91,7 @@ * Returns a {@link TermsEnum} positioned at this weights Term or null if * the term does not exist in the given context */ - private TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException { + TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException { final TermState state = termStates.get(context.ord); if (state == null) { // term is not present in that reader assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; Index: lucene/core/src/java/org/apache/lucene/search/TermScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/TermScorer.java (revision 1532782) +++ lucene/core/src/java/org/apache/lucene/search/TermScorer.java (working copy) @@ -26,7 +26,7 @@ */ final class TermScorer extends Scorer { private final DocsEnum docsEnum; - private final Similarity.SimScorer docScorer; + final Similarity.SimScorer docScorer; /** * Construct a TermScorer. Index: lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (revision 1532782) +++ lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (working copy) @@ -68,7 +68,7 @@ } private ArrayList clauses = new ArrayList(); - private final boolean disableCoord; + protected final boolean disableCoord; /** Constructs an empty boolean query. */ public BooleanQuery() { @@ -163,8 +163,7 @@ * Expert: the Weight for BooleanQuery, used to * normalize, score and explain these queries. * - *

NOTE: this API and implementation is subject to - * change suddenly in the next release.

+ * @lucene.experimental */ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ @@ -322,6 +321,11 @@ } } + return scorer(required, prohibited, optional, scoreDocsInOrder, topScorer, acceptDocs); + } + + protected Scorer scorer(List required, List prohibited, List optional, boolean scoreDocsInOrder, + boolean topScorer, Bits acceptDocs) throws IOException { // NOTE: we could also use BooleanScorer, if we knew // this BooleanQuery was embedded in another // BooleanQuery that was also using BooleanScorer (ie,