Index: src/java/org/apache/lucene/search/BooleanQuery.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java,v retrieving revision 1.29 diff -u -r1.29 BooleanQuery.java --- src/java/org/apache/lucene/search/BooleanQuery.java 24 Jan 2005 19:21:01 -0000 1.29 +++ src/java/org/apache/lucene/search/BooleanQuery.java 10 Feb 2005 10:09:57 -0000 @@ -17,6 +17,8 @@ */ import java.io.IOException; +import java.util.Iterator; +import java.util.Set; import java.util.Vector; import org.apache.lucene.index.IndexReader; @@ -322,7 +324,29 @@ return this; // no clauses rewrote } - + /** + * @see org.apache.lucene.search.Query#setSimilarity(org.apache.lucene.search.Similarity) + */ + public void setSimilarity(Similarity similarity) { + super.setSimilarity(similarity); + // forward similarity to queries of all contained clauses + for (Iterator i = clauses.iterator(); i.hasNext();) { + BooleanClause clause = (BooleanClause) i.next(); + clause.getQuery().setSimilarity(similarity); + } + } + + /** (non-Javadoc) + * @see org.apache.lucene.search.Query#addTerms(java.util.Set) + */ + public void addTerms(Set terms) { + for (Iterator i = clauses.iterator(); i.hasNext();) { + BooleanClause clause = (BooleanClause) i.next(); + clause.getQuery().addTerms(terms); + } + } + + public Object clone() { BooleanQuery clone = (BooleanQuery)super.clone(); clone.clauses = (Vector)this.clauses.clone(); Index: src/java/org/apache/lucene/search/FilteredQuery.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/FilteredQuery.java,v retrieving revision 1.5 diff -u -r1.5 FilteredQuery.java --- src/java/org/apache/lucene/search/FilteredQuery.java 18 Jun 2004 09:52:25 -0000 1.5 +++ src/java/org/apache/lucene/search/FilteredQuery.java 10 Feb 2005 10:09:57 -0000 @@ -19,6 +19,8 @@ import org.apache.lucene.index.IndexReader; import java.io.IOException; import java.util.BitSet; +import java.util.Iterator; +import java.util.Set; /** @@ -116,6 +118,21 @@ return query; } + /** @see org.apache.lucene.search.Query#setSimilarity(org.apache.lucene.search.Similarity) + */ + public void setSimilarity(Similarity similarity) { + super.setSimilarity(similarity); + // forward similarity to enclosed query + getQuery().setSimilarity(similarity); + } + + /* (non-Javadoc) + * @see org.apache.lucene.search.Query#addTerms(java.util.Set) + */ + public void addTerms(Set terms) { + getQuery().addTerms(terms); + } + /** Prints a user-readable version of this query. */ public String toString (String s) { return "filtered("+query.toString(s)+")->"+filter; Index: src/java/org/apache/lucene/search/MultiSearcher.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java,v retrieving revision 1.17 diff -u -r1.17 MultiSearcher.java --- src/java/org/apache/lucene/search/MultiSearcher.java 29 Mar 2004 22:48:03 -0000 1.17 +++ src/java/org/apache/lucene/search/MultiSearcher.java 10 Feb 2005 10:09:58 -0000 @@ -17,6 +17,12 @@ */ import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; @@ -109,11 +115,16 @@ public TopDocs search(Query query, Filter filter, int nDocs) throws IOException { + Query[] rewrittenQueries = prepareQueries(query); + +// if (query.getSimilarity(this) == getSimilarity()) { +// query.setSimilarity(new DfMapSimilarity(this, query), true); +// } HitQueue hq = new HitQueue(nDocs); int totalHits = 0; for (int i = 0; i < searchables.length; i++) { // search each searcher - TopDocs docs = searchables[i].search(query, filter, nDocs); + TopDocs docs = searchables[i].search(rewrittenQueries[i], filter, nDocs); totalHits += docs.totalHits; // update totalHits ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq @@ -134,11 +145,16 @@ public TopFieldDocs search (Query query, Filter filter, int n, Sort sort) throws IOException { + Query[] rewrittenQueries = prepareQueries(query); + +// if (query.getSimilarity(this) == getSimilarity()) { +// query.setSimilarity(new DfMapSimilarity(this, query), true); +// } FieldDocSortedHitQueue hq = null; int totalHits = 0; for (int i = 0; i < searchables.length; i++) { // search each searcher - TopFieldDocs docs = searchables[i].search (query, filter, n, sort); + TopFieldDocs docs = searchables[i].search (rewrittenQueries[i], filter, n, sort); if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n); totalHits += docs.totalHits; // update totalHits ScoreDoc[] scoreDocs = docs.scoreDocs; @@ -158,14 +174,165 @@ } - // inherit javadoc + /** + * Distributed query processing is done in the following steps: + * 1. rewrite queries using the Multisearch Similarity + * 2. extract necessary terms + * 3. collect idfs for these terms from the Searchables + * 4. Replace query similarity with DfCacheSimilarity. + * 5. distribute re-written and re-boosted query to Searchables + * 6. merge results + * + * Steps 1-4 are done here, 5+6 in the search() methods + * + * @return rewritten queries + */ + private Query[] prepareQueries(Query original) throws IOException { + Query[] rewrittenQueries = new Query[searchables.length]; + for (int i = 0; i < searchables.length; i++) { + // step 1 + rewrittenQueries[i] = searchables[i].rewrite(original); + } + + // step 2 + Set[] terms = new HashSet[searchables.length]; + Set allTerms = new HashSet(); + for (int i = 0; i < searchables.length; i++) { + terms[i] = new HashSet(); + rewrittenQueries[i].addTerms(terms[i]); + allTerms.addAll(terms[i]); + } + + // step3 + Term[] allTermsArray = new Term[allTerms.size()]; + allTerms.toArray(allTermsArray); + int[] aggregatedDfs = new int[allTerms.size()]; + for (int i = 0; i < searchables.length; i++) { + int[] dfs = searchables[i].docFreqs(allTermsArray); + for(int j=0; jb. Documents * matching this clause will (in addition to the normal weightings) have @@ -138,7 +140,17 @@ * implementation, perhaps one that delegates through that of the Searcher. * By default the Searcher's Similarity implementation is returned.*/ public Similarity getSimilarity(Searcher searcher) { - return searcher.getSimilarity(); + if(similarity == null){ + return searcher.getSimilarity(); + } + return similarity; + } + + /** Expert: allows to set a specific Similarity for this query + * @param similarity the similarity to be used to evaluate this query + */ + public void setSimilarity(Similarity similarity){ + this.similarity = similarity; } /** Returns a clone of this query. */ @@ -149,4 +161,14 @@ throw new RuntimeException("Clone not supported: " + e.getMessage()); } } + + /** + * adds all terms occuring in this query to the set + * + * @param terms + */ + public void addTerms(Set terms) { + // this is supported only by BooleanQuery and TermQuery + throw new UnsupportedOperationException(); + } } Index: src/java/org/apache/lucene/search/RemoteSearchable.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/RemoteSearchable.java,v retrieving revision 1.6 diff -u -r1.6 RemoteSearchable.java --- src/java/org/apache/lucene/search/RemoteSearchable.java 29 Mar 2004 22:48:03 -0000 1.6 +++ src/java/org/apache/lucene/search/RemoteSearchable.java 10 Feb 2005 10:09:58 -0000 @@ -51,6 +51,15 @@ public int docFreq(Term term) throws IOException { return local.docFreq(term); } + + + public int[] docFreqs(Term[] terms) throws IOException { + int[] result = new int[terms.length]; + for (int i = 0; i < terms.length; i++) { + result[i] = docFreq(terms[i]); + } + return result; + } public int maxDoc() throws IOException { return local.maxDoc(); Index: src/java/org/apache/lucene/search/Searchable.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/Searchable.java,v retrieving revision 1.13 diff -u -r1.13 Searchable.java --- src/java/org/apache/lucene/search/Searchable.java 14 Dec 2004 19:00:01 -0000 1.13 +++ src/java/org/apache/lucene/search/Searchable.java 10 Feb 2005 10:09:58 -0000 @@ -58,6 +58,13 @@ */ int docFreq(Term term) throws IOException; + /** Expert: For each term in the terms array, calculates the number of + * documents containing term. Returns an array with these + * document frequencies. Used to optimize remote calls. + * @see IndexReader#docFreq(Term) + */ + int[] docFreqs(Term[] terms) throws IOException; + /** Expert: Returns one greater than the largest possible document number. * Called by search code to compute term weights. * @see IndexReader#maxDoc() Index: src/java/org/apache/lucene/search/Searcher.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/Searcher.java,v retrieving revision 1.13 diff -u -r1.13 Searcher.java --- src/java/org/apache/lucene/search/Searcher.java 14 Dec 2004 19:00:01 -0000 1.13 +++ src/java/org/apache/lucene/search/Searcher.java 10 Feb 2005 10:09:58 -0000 @@ -18,6 +18,9 @@ import java.io.IOException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + /** An abstract base class for search implementations. * Implements some common utility methods. */ @@ -93,4 +96,19 @@ public Similarity getSimilarity() { return this.similarity; } + + + /** Expert: For each term in the terms array, calculates the number of + * documents containing term. Returns an array with these + * document frequencies. Used to optimize remote calls. + * @see IndexReader#docFreq(Term) + */ + public int[] docFreqs(Term[] terms) throws IOException { + int[] result = new int[terms.length]; + for (int i = 0; i < terms.length; i++) { + result[i] = docFreq(terms[i]); + } + return result; + } + } Index: src/java/org/apache/lucene/search/Similarity.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/Similarity.java,v retrieving revision 1.17 diff -u -r1.17 Similarity.java --- src/java/org/apache/lucene/search/Similarity.java 27 Oct 2004 21:59:02 -0000 1.17 +++ src/java/org/apache/lucene/search/Similarity.java 10 Feb 2005 10:09:58 -0000 @@ -17,6 +17,7 @@ */ import java.io.IOException; +import java.io.Serializable; import java.util.Collection; import java.util.Iterator; @@ -84,7 +85,7 @@ * @see IndexWriter#setSimilarity(Similarity) * @see Searcher#setSimilarity(Similarity) */ -public abstract class Similarity { +public abstract class Similarity implements Serializable { /** The Similarity implementation used by default. */ private static Similarity defaultImpl = new DefaultSimilarity(); Index: src/java/org/apache/lucene/search/TermQuery.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/TermQuery.java,v retrieving revision 1.13 diff -u -r1.13 TermQuery.java --- src/java/org/apache/lucene/search/TermQuery.java 6 Sep 2004 22:09:13 -0000 1.13 +++ src/java/org/apache/lucene/search/TermQuery.java 10 Feb 2005 10:09:58 -0000 @@ -17,6 +17,8 @@ */ import java.io.IOException; +import java.util.Set; + import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.IndexReader; @@ -137,6 +139,13 @@ protected Weight createWeight(Searcher searcher) { return new TermWeight(searcher); } + + /** + * @see org.apache.lucene.search.Query#addTerms(java.util.Set) + */ + public void addTerms(Set terms) { + terms.add(getTerm()); + } /** Prints a user-readable version of this query. */ public String toString(String field) { @@ -167,4 +176,5 @@ return Float.floatToIntBits(getBoost()) ^ term.hashCode(); } + } Index: src/test/org/apache/lucene/search/TestSort.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v retrieving revision 1.8 diff -u -r1.8 TestSort.java --- src/test/org/apache/lucene/search/TestSort.java 30 Aug 2004 20:52:15 -0000 1.8 +++ src/test/org/apache/lucene/search/TestSort.java 10 Feb 2005 10:09:59 -0000 @@ -344,12 +344,8 @@ HashMap scoresA = getScores (full.search (queryA)); // we'll test searching locally, remote and multi - // note: the multi test depends on each separate index containing - // the same documents as our local index, so the computed normalization - // will be the same. so we make a multi searcher over two equal document - // sets - not realistic, but necessary for testing. MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() }); - MultiSearcher multi = new MultiSearcher (new Searchable[] { full, full }); + MultiSearcher multi = new MultiSearcher (new Searchable[] { full }); // change sorting and make sure relevancy stays the same