Index: src/java/org/apache/lucene/search/Query.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/Query.java,v retrieving revision 1.18 diff -u -r1.18 Query.java --- src/java/org/apache/lucene/search/Query.java 15 Oct 2004 19:50:58 -0000 1.18 +++ src/java/org/apache/lucene/search/Query.java 15 Nov 2004 12:21:36 -0000 @@ -44,6 +44,7 @@ */ public abstract class Query implements java.io.Serializable, Cloneable { private float boost = 1.0f; // query boost factor + private Similarity similarity = null; // similarity to be used by query /** Sets the boost for this query clause to b. Documents * matching this clause will (in addition to the normal weightings) have @@ -138,7 +139,19 @@ * implementation, perhaps one that delegates through that of the Searcher. * By default the Searcher's Similarity implementation is returned.*/ public Similarity getSimilarity(Searcher searcher) { - return searcher.getSimilarity(); + if(similarity == null){ + return searcher.getSimilarity(); + } + return similarity; + } + + /** Expert: allows to set a specific Similarity for this query + * + * @param similarity the similarity to be used to evaluate this query + * @param deep if true, use this similarity for all sub-queries, too + */ + public void setSimilarity(Similarity similarity, boolean deep){ + this.similarity = similarity; } /** Returns a clone of this query. */ Index: src/java/org/apache/lucene/search/BooleanQuery.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java,v retrieving revision 1.26 diff -u -r1.26 BooleanQuery.java --- src/java/org/apache/lucene/search/BooleanQuery.java 19 Oct 2004 19:51:35 -0000 1.26 +++ src/java/org/apache/lucene/search/BooleanQuery.java 15 Nov 2004 12:21:35 -0000 @@ -17,6 +17,7 @@ */ import java.io.IOException; +import java.util.Iterator; import java.util.Vector; import org.apache.lucene.index.IndexReader; @@ -331,4 +332,17 @@ return Float.floatToIntBits(getBoost()) ^ clauses.hashCode(); } + /** + * @see org.apache.lucene.search.Query#setSimilarity(org.apache.lucene.search.Similarity, boolean) + */ + public void setSimilarity(Similarity similarity, boolean deep) { + super.setSimilarity(similarity, deep); + if (deep) { + // forward similarity to queries of all contained clauses + for (Iterator i = clauses.iterator(); i.hasNext();) { + BooleanClause clause = (BooleanClause) i.next(); + clause.getQuery().setSimilarity(similarity, deep); + } + } + } } Index: src/java/org/apache/lucene/search/FilteredQuery.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/FilteredQuery.java,v retrieving revision 1.5 diff -u -r1.5 FilteredQuery.java --- src/java/org/apache/lucene/search/FilteredQuery.java 18 Jun 2004 09:52:25 -0000 1.5 +++ src/java/org/apache/lucene/search/FilteredQuery.java 15 Nov 2004 12:21:35 -0000 @@ -19,6 +19,7 @@ import org.apache.lucene.index.IndexReader; import java.io.IOException; import java.util.BitSet; +import java.util.Iterator; /** @@ -116,6 +117,17 @@ return query; } + /** @see org.apache.lucene.search.Query#setSimilarity(org.apache.lucene.search.Similarity, + * boolean) + */ + public void setSimilarity(Similarity similarity, boolean deep) { + super.setSimilarity(similarity, deep); + if (deep) { + // forward similarity to enclosed query + getQuery().setSimilarity(similarity, deep); + } + } + /** Prints a user-readable version of this query. */ public String toString (String s) { return "filtered("+query.toString(s)+")->"+filter; Index: src/java/org/apache/lucene/search/MultiSearcher.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java,v retrieving revision 1.17 diff -u -r1.17 MultiSearcher.java --- src/java/org/apache/lucene/search/MultiSearcher.java 29 Mar 2004 22:48:03 -0000 1.17 +++ src/java/org/apache/lucene/search/MultiSearcher.java 15 Nov 2004 12:21:35 -0000 @@ -109,6 +109,7 @@ public TopDocs search(Query query, Filter filter, int nDocs) throws IOException { + query.setSimilarity(new DfMapSimilarity(this, query), true); HitQueue hq = new HitQueue(nDocs); int totalHits = 0; @@ -134,6 +135,7 @@ public TopFieldDocs search (Query query, Filter filter, int n, Sort sort) throws IOException { + query.setSimilarity(new DfMapSimilarity(this, query), true); FieldDocSortedHitQueue hq = null; int totalHits = 0; @@ -161,6 +163,7 @@ // inherit javadoc public void search(Query query, Filter filter, final HitCollector results) throws IOException { + query.setSimilarity(new DfMapSimilarity(this, query), true); for (int i = 0; i < searchables.length; i++) { final int start = starts[i]; Index: src/test/org/apache/lucene/search/TestSort.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v retrieving revision 1.8 diff -u -r1.8 TestSort.java --- src/test/org/apache/lucene/search/TestSort.java 30 Aug 2004 20:52:15 -0000 1.8 +++ src/test/org/apache/lucene/search/TestSort.java 15 Nov 2004 12:21:36 -0000 @@ -344,12 +344,8 @@ HashMap scoresA = getScores (full.search (queryA)); // we'll test searching locally, remote and multi - // note: the multi test depends on each separate index containing - // the same documents as our local index, so the computed normalization - // will be the same. so we make a multi searcher over two equal document - // sets - not realistic, but necessary for testing. MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() }); - MultiSearcher multi = new MultiSearcher (new Searchable[] { full, full }); + MultiSearcher multi = new MultiSearcher (new Searchable[] { full }); // change sorting and make sure relevancy stays the same Index: src/java/org/apache/lucene/search/DfMapSimilarity.java =================================================================== RCS file: src/java/org/apache/lucene/search/DfMapSimilarity.java diff -N src/java/org/apache/lucene/search/DfMapSimilarity.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/java/org/apache/lucene/search/DfMapSimilarity.java 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,113 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.index.Term; + +/** This Similarity class can be used if the score is to be calculated + * using given document frequencies and document count regardless of the + * values provided by the searcher. + * + * @author Wolf Siberski + */ +public class DfMapSimilarity extends DefaultSimilarity implements Serializable { + private Map dfMap; // Map from Terms to corresponding doc freqs (as Integer) + private int maxDoc; // document count + + /** Constructor. + * @param maxDoc document count to be used for idf calculation + */ + public DfMapSimilarity(int maxDoc) { + this.maxDoc = maxDoc; + dfMap = new HashMap(); + } + + /** Constructor. Retrieves document count and all document frequencies + * necessary for a query from the passed searcher. + * Note: This constructor only works for queries with constant terms, + * but not for range queries, wildcard queries, etc.! + * + * @param searcher Searcher to be used as df source + * @param query Query to be analysed + * @throws IOException + */ + public DfMapSimilarity(Searchable searcher, Query query) throws IOException { + this(searcher.maxDoc()); + collectDfs(searcher, query); + } + + /** @see org.apache.lucene.search.Similarity#idf(org.apache.lucene.index.Term, + * org.apache.lucene.search.Searcher) + */ + public float idf(Term term, Searcher searcher) throws IOException { + return super.idf(df(term), maxDoc); + } + + /** returns the stored document frequency for the passed term + */ + private int df(Term term) { + int df; + try { + df = ((Integer) dfMap.get(term)).intValue(); + } catch (NullPointerException e) { + throw new IllegalArgumentException("df for term " + term.text() + + " not available"); + } + return df; + } + + /** extract all terms from the passed query, retrieve corresponding + * document frequencies from searcher and store them. + * @param searcher Searcher to be used as df source + * @param query Query to be analysed + * @throws IOException + */ + private void collectDfs(Searchable searcher, Query query) throws IOException { + if (query instanceof BooleanQuery) { + BooleanQuery bq = (BooleanQuery) query; + for (int i = 0; i < bq.getClauses().length; i++) { + collectDfs(searcher, bq.getClauses()[i].getQuery()); + } + } else if (query instanceof FilteredQuery) { + collectDfs(searcher, ((FilteredQuery) query).getQuery()); + } else if (query instanceof TermQuery) { + Term term = ((TermQuery) query).getTerm(); + addDocFreq(term, searcher.docFreq(term)); + } else if (query instanceof PhraseQuery) { + Term[] terms = ((PhraseQuery) query).getTerms(); + for (int i = 0; i < terms.length; i++) { + addDocFreq(terms[i], searcher.docFreq(terms[i])); + } + } else { + throw new IllegalArgumentException(query.getClass().getName() + + " not supported"); + } + } + + /** adds a document frequency + * @param term new term + * @param df corresponding document frequency + */ + public void addDocFreq(Term term, int df) { + dfMap.put(term, new Integer(df)); + } +} Index: src/java/org/apache/lucene/search/MultiSimilarity.java =================================================================== RCS file: src/java/org/apache/lucene/search/MultiSimilarity.java diff -N src/java/org/apache/lucene/search/MultiSimilarity.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/java/org/apache/lucene/search/MultiSimilarity.java 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,49 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.Term; + +/** This Similarity forwards idf requests to a given searcher, + * regardless of the searcher which is currently evaluating the + * query. + * + * Note: This Similarity can't be used for remote search, as Searchers + * are obviously not serializable + */ +public class MultiSimilarity extends DefaultSimilarity { + Searcher searcher; + + /** Constructor. + * + * @param searcher the searcher to be used as data source for + * calculation of idf values + */ + public MultiSimilarity(Searcher searcher) { + this.searcher = searcher; + } + + /** @see org.apache.lucene.search.Similarity#idf(org.apache.lucene.index.Term, + * org.apache.lucene.search.Searcher) + */ + public float idf(Term term, Searcher dummy) throws IOException { + // ignore Searcher argument and use this.searcher instead + return super.idf(term, this.searcher); + } +}