Index: lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java =================================================================== --- lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java (revision 1362969) +++ lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Set; import java.util.Arrays; @@ -325,6 +327,16 @@ } @Override + public float freq() throws IOException { + return subQueryScorer.freq(); + } + + @Override + public Collection getChildren() { + return Collections.singletonList(new ChildScorer(subQueryScorer, "CUSTOM")); + } + + @Override public int advance(int target) throws IOException { int doc = subQueryScorer.advance(target); if (doc != NO_MORE_DOCS) { Index: lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java =================================================================== --- lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java (revision 1362969) +++ lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java (working copy) @@ -25,6 +25,8 @@ import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Set; import java.util.Map; @@ -164,6 +166,16 @@ return score>Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE; } + @Override + public float freq() throws IOException { + return scorer.freq(); + } + + @Override + public Collection getChildren() { + return Collections.singletonList(new ChildScorer(scorer, "CUSTOM")); + } + public Explanation explain(int doc) throws IOException { Explanation subQueryExpl = weight.qWeight.explain(readerContext ,doc); if (!subQueryExpl.isMatch()) { Index: lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java =================================================================== --- lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java (revision 1362969) +++ lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java (working copy) @@ -82,4 +82,9 @@ public float score() throws IOException { return values.floatVal(doc); } + + @Override + public float freq() throws IOException { + return 1; + } } Index: lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java =================================================================== --- lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java (revision 1362969) +++ lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java (working copy) @@ -158,6 +158,11 @@ return score>Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE; } + @Override + public float freq() throws IOException { + return 1; + } + public Explanation explain(int doc) throws IOException { float sc = qWeight * vals.floatVal(doc); Index: lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (working copy) @@ -81,6 +81,7 @@ Scorer[] scorers = new Scorer[] {new Scorer(weight) { private int doc = -1; @Override public float score() { return 0; } + @Override public float freq() { return 0; } @Override public int docID() { return doc; } @Override public int nextDoc() { Index: lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java (working copy) @@ -75,7 +75,7 @@ private final Set relationships; public CountingCollector(Collector other) { - this(other, new HashSet(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString(), Occur.MUST_NOT.toString()))); + this(other, new HashSet(Arrays.asList("MUST", "SHOULD", "MUST_NOT"))); } public CountingCollector(Collector other, Set relationships) { @@ -161,9 +161,9 @@ query.add(inner, Occur.MUST); query.add(aQuery, Occur.MUST); query.add(dQuery, Occur.MUST); - @SuppressWarnings({"rawtypes","unchecked"}) Set[] occurList = new Set[] { - Collections.singleton(Occur.MUST.toString()), - new HashSet(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString())) + Set[] occurList = new Set[] { + Collections.singleton("MUST"), + new HashSet(Arrays.asList("MUST", "SHOULD")) }; for (Set occur : occurList) { CountingCollector c = new CountingCollector(TopScoreDocCollector.create( @@ -171,7 +171,7 @@ s.search(query, null, c); final int maxDocs = s.getIndexReader().maxDoc(); assertEquals(maxDocs, c.docCounts.size()); - boolean includeOptional = occur.contains(Occur.SHOULD.toString()); + boolean includeOptional = occur.contains("SHOULD"); for (int i = 0; i < maxDocs; i++) { Map doc0 = c.docCounts.get(i); assertEquals(includeOptional ? 5 : 4, doc0.size()); Index: lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java (working copy) @@ -43,6 +43,10 @@ // once per document. return idx == scores.length ? Float.NaN : scores[idx++]; } + + @Override public float freq() throws IOException { + return 1; + } @Override public int docID() { return doc; } Index: lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java (working copy) @@ -250,7 +250,7 @@ searcher.setSimilarity(oldSimilarity); } } - + @Test public void testRandomQueries() throws Exception { String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"}; Index: lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (working copy) @@ -225,6 +225,11 @@ public float score() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public float freq() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } @Override public int docID() { Index: lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java (working copy) @@ -35,6 +35,10 @@ @Override public float score() { return idx == scores.length ? Float.NaN : scores[idx]; } + + @Override public float freq() { + return 1; + } @Override public int docID() { return idx; } Index: lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java (revision 1362969) +++ lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java (working copy) @@ -34,6 +34,9 @@ @Override public float score() throws IOException { return 0; } + + @Override + public float freq() throws IOException { return 0; } @Override public int docID() { return 0; } Index: lucene/core/src/test/org/apache/lucene/search/Test2LUCENE2590.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/Test2LUCENE2590.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/search/Test2LUCENE2590.java (working copy) @@ -0,0 +1,203 @@ +package org.apache.lucene.search; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.util.Version; + +// nocommit: change this to a real test +// nocommit: add a base class, that collects shit from the scorer tree +// and test all queries with it +public class Test2LUCENE2590 extends LuceneTestCase { + Analyzer analyzer; + Directory dir; + + static final String F1 = "title"; + static final String F2 = "body"; + + @Override + public void setUp() throws Exception { + super.setUp(); + analyzer = new MockAnalyzer(random()); + dir = newDirectory(); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + public void test() throws IOException { + makeIndex(); + searchIndex(); + } + + void makeIndex() throws IOException { + IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer ); + IndexWriter writer = new IndexWriter( dir, config ); + //writer.addDocument( doc( "lucene", "lucene is a very popular search engine library. lucene runs overall in the world. lucene is great!" ) ); + writer.addDocument( doc( "lucene", "lucene is a very popular search engine library" ) ); + writer.addDocument( doc( "solr", "solr is a very popular search server and is using lucene" ) ); + writer.addDocument( doc( "nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop" ) ); + writer.close(); + } + + static Document doc( String v1, String v2 ){ + Document doc = new Document(); + if( v1 != null ) + doc.add( field( F1, v1 ) ); + if( v2 != null ) + doc.add( field( F2, v2 ) ); + return doc; + } + + static Field field( String field, String value ){ + return new TextField(field, value, Store.YES); + } + + void searchIndex() throws IOException { + IndexReader reader = DirectoryReader.open(dir); + IndexSearcher searcher = new IndexSearcher(reader); + printResult( searcher, query( new Term( F1, "lucene"), new Term( F2, "lucene" ), new Term( F2, "search" ) ) ); + reader.close(); + } + + static Query query( Term... ts ){ + if( ts == null || ts.length == 0 ){ + throw new IllegalArgumentException(); + } + if( ts.length == 1 ) + return new TermQuery( ts[0] ); + BooleanQuery bq = new BooleanQuery(); + for( Term t : ts ){ + bq.add( new TermQuery( t ), Occur.SHOULD ); + } + return bq; + } + + static void printResult( IndexSearcher searcher, Query query ) throws IOException { + MyCollector collector = new MyCollector(); + searcher.search( query, collector ); + TopDocs docs = collector.topDocs(); + for( ScoreDoc scoreDoc : docs.scoreDocs ){ + Document doc = searcher.doc( scoreDoc.doc ); + float score = scoreDoc.score; + System.out.println( score + " : " + doc.get( F1 ) + " / " + doc.get( F2 ) ); + System.out.println( " freq : " + collector.freq( scoreDoc.doc) ); + } + } + + static class MyCollector extends Collector { + + private TopDocsCollector collector; + private int docBase; + + public final Map docCounts = new HashMap(); + private final Set tqsSet = new HashSet(); + + MyCollector(){ + collector = TopScoreDocCollector.create( 10, true ); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + + @Override + public void collect(int doc) throws IOException { + int freq = 0; + for(Scorer scorer : tqsSet) { + if (doc == scorer.docID()) { + freq += scorer.freq(); + } + } + docCounts.put(doc + docBase, freq); + collector.collect(doc); + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + this.docBase = context.docBase; + collector.setNextReader(context); + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer( scorer ); + tqsSet.clear(); + fillLeaves(scorer, tqsSet); + System.out.println(scorer); + System.out.println(tqsSet.size()); + } + + private void fillLeaves(Scorer scorer, Set set) { + if (scorer.getWeight().getQuery() instanceof TermQuery) { + set.add(scorer); + } else { + for (ChildScorer child : scorer.getChildren()) { + fillLeaves(child.child, set); + } + } + } + + public TopDocs topDocs(){ + return collector.topDocs(); + } + + public int freq( int doc ) throws IOException { + return docCounts.get( doc ); + } + } +} Index: lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.util.ArrayUtil; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -136,4 +137,18 @@ } return sum * coord; } + + @Override + public float freq() throws IOException { + return scorers.length; + } + + @Override + public Collection getChildren() { + ArrayList children = new ArrayList(scorers.length); + for (Scorer scorer : scorers) { + children.add(new ChildScorer(scorer, "MUST")); + } + return children; + } } Index: lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.IOException; +import java.util.Collection; +import java.util.Collections; /** * A {@link Scorer} which wraps another scorer and caches the score of the @@ -59,6 +61,11 @@ } @Override + public float freq() throws IOException { + return scorer.freq(); + } + + @Override public int docID() { return scorer.docID(); } @@ -77,5 +84,9 @@ public int advance(int target) throws IOException { return scorer.advance(target); } - + + @Override + public Collection getChildren() { + return Collections.singletonList(new ChildScorer(scorer, "CACHED")); + } } Index: lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java (working copy) @@ -17,6 +17,8 @@ */ import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; /** A Scorer for queries with a required part and an optional part. * Delays skipTo() on the optional part until a score() is needed. @@ -39,6 +41,8 @@ Scorer optScorer) { super(reqScorer.weight); + assert reqScorer != null; + assert optScorer != null; this.reqScorer = reqScorer; this.optScorer = optScorer; } @@ -80,5 +84,19 @@ return optScorerDoc == curDoc ? reqScore + optScorer.score() : reqScore; } + @Override + public float freq() throws IOException { + // we might have deferred advance() + score(); + return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1; + } + + @Override + public Collection getChildren() { + ArrayList children = new ArrayList(2); + children.add(new ChildScorer(reqScorer, "MUST")); + children.add(new ChildScorer(optScorer, "SHOULD")); + return children; + } } Index: lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (working copy) @@ -131,6 +131,11 @@ } @Override + public float freq() throws IOException { + return 1; // nocommit + } + + @Override public int docID() { return scorer.docID(); } @@ -310,8 +315,8 @@ } @Override - public float freq() { - return coordinator.nrMatchers; + public float freq() throws IOException { + return countingSumScorer.freq(); } @Override @@ -323,13 +328,13 @@ public Collection getChildren() { ArrayList children = new ArrayList(); for (Scorer s : optionalScorers) { - children.add(new ChildScorer(s, Occur.SHOULD.toString())); + children.add(new ChildScorer(s, "SHOULD")); } for (Scorer s : prohibitedScorers) { - children.add(new ChildScorer(s, Occur.MUST_NOT.toString())); + children.add(new ChildScorer(s, "MUST_NOT")); } for (Scorer s : requiredScorers) { - children.add(new ChildScorer(s, Occur.MUST.toString())); + children.add(new ChildScorer(s, "MUST")); } return children; } Index: lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java (working copy) @@ -18,8 +18,9 @@ */ import java.io.IOException; +import java.util.Collection; +import java.util.Collections; - /** A Scorer for queries with a required subscorer * and an excluding (prohibited) sub DocIdSetIterator. *
@@ -104,6 +105,16 @@ } @Override + public float freq() throws IOException { + return reqScorer.freq(); + } + + @Override + public Collection getChildren() { + return Collections.singletonList(new ChildScorer(reqScorer, "FILTERED")); + } + + @Override public int advance(int target) throws IOException { if (reqScorer == null) { return doc = NO_MORE_DOCS; Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (working copy) @@ -20,42 +20,20 @@ import java.util.List; import java.io.IOException; -import org.apache.lucene.util.ScorerDocQueue; - /** A Scorer for OR like queries, counterpart of ConjunctionScorer. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. */ -class DisjunctionSumScorer extends Scorer { - /** The number of subscorers. */ - private final int nrScorers; - - /** The subscorers. */ - protected final List subScorers; - +class DisjunctionSumScorer extends DisjunctionScorer { /** The minimum number of scorers that should match. */ private final int minimumNrMatchers; - /** The scorerDocQueue contains all subscorers ordered by their current doc(), - * with the minimum at the top. - *
The scorerDocQueue is initialized the first time nextDoc() or advance() is called. - *
An exhausted scorer is immediately removed from the scorerDocQueue. - *
If less than the minimumNrMatchers scorers - * remain in the scorerDocQueue nextDoc() and advance() return false. - *

- * After each to call to nextDoc() or advance() - * currentSumScore is the total score of the current matching doc, - * nrMatchers is the number of matching scorers, - * and all scorers are after the matching doc, or are exhausted. - */ - private final ScorerDocQueue scorerDocQueue; - /** The document number of the current match. */ - private int currentDoc = -1; + private int doc = -1; /** The number of subscorers that provide the current match. */ protected int nrMatchers = -1; - private double currentScore = Float.NaN; + private double score = Float.NaN; /** Construct a DisjunctionScorer. * @param weight The weight to be used. @@ -69,21 +47,16 @@ * it more efficient to use ConjunctionScorer. */ public DisjunctionSumScorer(Weight weight, List subScorers, int minimumNrMatchers) throws IOException { - super(weight); - - nrScorers = subScorers.size(); + super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size()); if (minimumNrMatchers <= 0) { throw new IllegalArgumentException("Minimum nr of matchers must be positive"); } - if (nrScorers <= 1) { + if (numScorers <= 1) { throw new IllegalArgumentException("There must be at least 2 subScorers"); } this.minimumNrMatchers = minimumNrMatchers; - this.subScorers = subScorers; - - scorerDocQueue = initScorerDocQueue(); } /** Construct a DisjunctionScorer, using one as the minimum number @@ -93,119 +66,66 @@ this(weight, subScorers, 1); } - /** Called the first time nextDoc() or advance() is called to - * initialize scorerDocQueue. - * @return - */ - private ScorerDocQueue initScorerDocQueue() throws IOException { - final ScorerDocQueue docQueue = new ScorerDocQueue(nrScorers); - for (final Scorer se : subScorers) { - if (se.nextDoc() != NO_MORE_DOCS) { - docQueue.insert(se); - } - } - return docQueue; - } - - /** Scores and collects all matching documents. - * @param collector The collector to which all matching documents are passed through. - */ @Override - public void score(Collector collector) throws IOException { - collector.setScorer(this); - while (nextDoc() != NO_MORE_DOCS) { - collector.collect(currentDoc); - } - } - - /** Expert: Collects matching documents in a range. Hook for optimization. - * Note that {@link #nextDoc()} must be called once before this method is called - * for the first time. - * @param collector The collector to which all matching documents are passed through. - * @param max Do not score documents past this. - * @return true if more matching documents may remain. - */ - @Override - public boolean score(Collector collector, int max, int firstDocID) throws IOException { - // firstDocID is ignored since nextDoc() sets 'currentDoc' - collector.setScorer(this); - while (currentDoc < max) { - collector.collect(currentDoc); - if (nextDoc() == NO_MORE_DOCS) { - return false; - } - } - return true; - } - - @Override public int nextDoc() throws IOException { - - if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent()) { - currentDoc = NO_MORE_DOCS; - } - return currentDoc; - } - - /** Advance all subscorers after the current document determined by the - * top of the scorerDocQueue. - * Repeat until at least the minimum number of subscorers match on the same - * document and all subscorers are after that document or are exhausted. - *
On entry the scorerDocQueue has at least minimumNrMatchers - * available. At least the scorer with the minimum document number will be advanced. - * @return true iff there is a match. - *
In case there is a match, currentDoc, currentSumScore, - * and nrMatchers describe the match. - * - * TODO: Investigate whether it is possible to use advance() when - * the minimum number of matchers is bigger than one, ie. try and use the - * character of ConjunctionScorer for the minimum number of matchers. - * Also delay calling score() on the sub scorers until the minimum number of - * matchers is reached. - *
For this, a Scorer array with minimumNrMatchers elements might - * hold Scorers at currentDoc that are temporarily popped from scorerQueue. - */ - protected boolean advanceAfterCurrent() throws IOException { - do { // repeat until minimum nr of matchers - currentDoc = scorerDocQueue.topDoc(); - currentScore = scorerDocQueue.topScore(); - nrMatchers = 1; - do { // Until all subscorers are after currentDoc - if (!scorerDocQueue.topNextAndAdjustElsePop()) { - if (scorerDocQueue.size() == 0) { - break; // nothing more to advance, check for last match. + while(true) { + while (subScorers[0].docID() == doc) { + if (subScorers[0].nextDoc() != NO_MORE_DOCS) { + heapAdjust(0); + } else { + heapRemoveRoot(); + if (numScorers < minimumNrMatchers) { + return doc = NO_MORE_DOCS; } } - if (scorerDocQueue.topDoc() != currentDoc) { - break; // All remaining subscorers are after currentDoc. - } - currentScore += scorerDocQueue.topScore(); - nrMatchers++; - } while (true); - + } + afterNext(); if (nrMatchers >= minimumNrMatchers) { - return true; - } else if (scorerDocQueue.size() < minimumNrMatchers) { - return false; + break; } - } while (true); + } + + return doc; } + private void afterNext() throws IOException { + final Scorer sub = subScorers[0]; + doc = sub.docID(); + score = sub.score(); + nrMatchers = 1; + countMatches(1); + countMatches(2); + } + + // TODO: this currently scores, but so did the previous impl + // TODO: remove recursion. + // TODO: if we separate scoring, out of here, modify this + // and afterNext() to terminate when nrMatchers == minimumNrMatchers + // then also change freq() to just always compute it from scratch + private void countMatches(int root) throws IOException { + if (root < numScorers && subScorers[root].docID() == doc) { + nrMatchers++; + score += subScorers[root].score(); + countMatches((root<<1)+1); + countMatches((root<<1)+2); + } + } + /** Returns the score of the current document matching the query. * Initially invalid, until {@link #nextDoc()} is called the first time. */ @Override - public float score() throws IOException { return (float)currentScore; } + public float score() throws IOException { + return (float)score; + } @Override public int docID() { - return currentDoc; + return doc; } - - /** Returns the number of subscorers matching the current document. - * Initially invalid, until {@link #nextDoc()} is called the first time. - */ - public int nrMatchers() { + + @Override + public float freq() throws IOException { return nrMatchers; } @@ -221,20 +141,24 @@ */ @Override public int advance(int target) throws IOException { - if (scorerDocQueue.size() < minimumNrMatchers) { - return currentDoc = NO_MORE_DOCS; + if (numScorers == 0) return doc = NO_MORE_DOCS; + while (subScorers[0].docID() < target) { + if (subScorers[0].advance(target) != NO_MORE_DOCS) { + heapAdjust(0); + } else { + heapRemoveRoot(); + if (numScorers == 0) { + return doc = NO_MORE_DOCS; + } + } } - if (target <= currentDoc) { - return currentDoc; + + afterNext(); + + if (nrMatchers >= minimumNrMatchers) { + return doc; + } else { + return nextDoc(); } - do { - if (scorerDocQueue.topDoc() >= target) { - return advanceAfterCurrent() ? currentDoc : (currentDoc = NO_MORE_DOCS); - } else if (!scorerDocQueue.topSkipToAndAdjustElsePop(target)) { - if (scorerDocQueue.size() < minimumNrMatchers) { - return currentDoc = NO_MORE_DOCS; - } - } - } while (true); } } Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (working copy) @@ -0,0 +1,104 @@ +package org.apache.lucene.search; + +import java.util.ArrayList; +import java.util.Collection; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +abstract class DisjunctionScorer extends Scorer { + protected final Scorer subScorers[]; + protected int numScorers; + + protected DisjunctionScorer(Weight weight, Scorer subScorers[], int numScorers) { + super(weight); + this.subScorers = subScorers; + this.numScorers = numScorers; + heapify(); + } + + /** + * Organize subScorers into a min heap with scorers generating the earliest document on top. + */ + protected final void heapify() { + for (int i = (numScorers >> 1) - 1; i >= 0; i--) { + heapAdjust(i); + } + } + + /** + * The subtree of subScorers at root is a min heap except possibly for its root element. + * Bubble the root down as required to make the subtree a heap. + */ + protected final void heapAdjust(int root) { + Scorer scorer = subScorers[root]; + int doc = scorer.docID(); + int i = root; + while (i <= (numScorers >> 1) - 1) { + int lchild = (i << 1) + 1; + Scorer lscorer = subScorers[lchild]; + int ldoc = lscorer.docID(); + int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2; + Scorer rscorer = null; + if (rchild < numScorers) { + rscorer = subScorers[rchild]; + rdoc = rscorer.docID(); + } + if (ldoc < doc) { + if (rdoc < ldoc) { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } else { + subScorers[i] = lscorer; + subScorers[lchild] = scorer; + i = lchild; + } + } else if (rdoc < doc) { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } else { + return; + } + } + } + + /** + * Remove the root Scorer from subScorers and re-establish it as a heap + */ + protected final void heapRemoveRoot() { + if (numScorers == 1) { + subScorers[0] = null; + numScorers = 0; + } else { + subScorers[0] = subScorers[numScorers - 1]; + subScorers[numScorers - 1] = null; + --numScorers; + heapAdjust(0); + } + } + + @Override + public final Collection getChildren() { + ArrayList children = new ArrayList(numScorers); + for (int i = 0; i < numScorers; i++) { + children.add(new ChildScorer(subScorers[i], "SHOULD")); + } + return children; + } +} Index: lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java (working copy) @@ -24,6 +24,8 @@ import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Set; @@ -221,6 +223,14 @@ public float score() throws IOException { return scorer.score(); } + + @Override + public float freq() throws IOException { return scorer.freq(); } + + @Override + public Collection getChildren() { + return Collections.singletonList(new ChildScorer(scorer, "FILTERED")); + } }; } } Index: lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (working copy) @@ -95,6 +95,11 @@ } return sum * coord; } + + @Override + public float freq() { + return docsAndFreqs.length; + } static final class DocsAndFreqs { final DocsEnum docsAndFreqs; Index: lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -194,6 +194,11 @@ } @Override + public float freq() throws IOException { + return 1; + } + + @Override public int advance(int target) throws IOException { return docIdSetIterator.advance(target); } Index: lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (working copy) @@ -318,6 +318,11 @@ } @Override + public float freq() throws IOException { + return current.coord; + } + + @Override public void score(Collector collector) throws IOException { score(collector, Integer.MAX_VALUE, -1); } @@ -338,7 +343,8 @@ public Collection getChildren() { List children = new ArrayList(); for (SubScorer sub = scorers; sub != null; sub = sub.next) { - children.add(new ChildScorer(sub.scorer, sub.prohibited ? Occur.MUST_NOT.toString() : Occur.SHOULD.toString())); + // TODO: fix this if BQ ever sends us required clauses + children.add(new ChildScorer(sub.scorer, sub.prohibited ? "MUST_NOT" : "SHOULD")); } return children; } Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (working copy) @@ -17,9 +17,6 @@ */ import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; /** * The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers @@ -27,11 +24,7 @@ * by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores * for the other subqueries that generate the document. */ -class DisjunctionMaxScorer extends Scorer { - - /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */ - private final Scorer[] subScorers; - private int numScorers; +class DisjunctionMaxScorer extends DisjunctionScorer { /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ private final float tieBreakerMultiplier; private int doc = -1; @@ -56,15 +49,8 @@ */ public DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers, int numScorers) { - super(weight); + super(weight, subScorers, numScorers); this.tieBreakerMultiplier = tieBreakerMultiplier; - // The passed subScorers array includes only scorers which have documents - // (DisjunctionMaxQuery takes care of that), and their nextDoc() was already - // called. - this.subScorers = subScorers; - this.numScorers = numScorers; - - heapify(); } @Override @@ -114,6 +100,24 @@ } @Override + public float freq() throws IOException { + int doc = subScorers[0].docID(); + int size = numScorers; + return 1 + freq(1, size, doc) + freq(2, size, doc); + } + + // Recursively iterate all subScorers that generated last doc computing sum and max + private int freq(int root, int size, int doc) throws IOException { + int freq = 0; + if (root < size && subScorers[root].docID() == doc) { + freq++; + freq += freq((root<<1)+1, size, doc); + freq += freq((root<<1)+2, size, doc); + } + return freq; + } + + @Override public int advance(int target) throws IOException { if (numScorers == 0) return doc = NO_MORE_DOCS; while (subScorers[0].docID() < target) { @@ -128,70 +132,4 @@ } return doc = subScorers[0].docID(); } - - // Organize subScorers into a min heap with scorers generating the earliest document on top. - private void heapify() { - for (int i = (numScorers >> 1) - 1; i >= 0; i--) { - heapAdjust(i); - } - } - - /* The subtree of subScorers at root is a min heap except possibly for its root element. - * Bubble the root down as required to make the subtree a heap. - */ - private void heapAdjust(int root) { - Scorer scorer = subScorers[root]; - int doc = scorer.docID(); - int i = root; - while (i <= (numScorers >> 1) - 1) { - int lchild = (i << 1) + 1; - Scorer lscorer = subScorers[lchild]; - int ldoc = lscorer.docID(); - int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2; - Scorer rscorer = null; - if (rchild < numScorers) { - rscorer = subScorers[rchild]; - rdoc = rscorer.docID(); - } - if (ldoc < doc) { - if (rdoc < ldoc) { - subScorers[i] = rscorer; - subScorers[rchild] = scorer; - i = rchild; - } else { - subScorers[i] = lscorer; - subScorers[lchild] = scorer; - i = lchild; - } - } else if (rdoc < doc) { - subScorers[i] = rscorer; - subScorers[rchild] = scorer; - i = rchild; - } else { - return; - } - } - } - - // Remove the root Scorer from subScorers and re-establish it as a heap - private void heapRemoveRoot() { - if (numScorers == 1) { - subScorers[0] = null; - numScorers = 0; - } else { - subScorers[0] = subScorers[numScorers - 1]; - subScorers[numScorers - 1] = null; - --numScorers; - heapAdjust(0); - } - } - - @Override - public Collection getChildren() { - final ChildScorer[] children = new ChildScorer[numScorers]; - for (int i = 0; i< numScorers; i++) { - children[i] = new ChildScorer(subScorers[i], BooleanClause.Occur.SHOULD.toString()); - } - return Collections.unmodifiableCollection(Arrays.asList(children)); - } } Index: lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (working copy) @@ -68,6 +68,11 @@ } @Override + public float freq() { + return 1; + } + + @Override public int advance(int target) throws IOException { doc = target-1; return nextDoc(); Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java (working copy) @@ -158,7 +158,7 @@ for (Weight w : weights) { // we will advance() subscorers Scorer subScorer = w.scorer(context, true, false, acceptDocs); - if (subScorer != null && subScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (subScorer != null) { scorers[idx++] = subScorer; } } Index: lucene/core/src/java/org/apache/lucene/search/Scorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/Scorer.java (revision 1362969) +++ lucene/core/src/java/org/apache/lucene/search/Scorer.java (working copy) @@ -98,9 +98,7 @@ * "sloppy" the match was. * * @lucene.experimental */ - public float freq() throws IOException { - throw new UnsupportedOperationException(this + " does not implement freq()"); - } + public abstract float freq() throws IOException; /** returns parent Weight * @lucene.experimental Index: lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (revision 1362969) +++ lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (working copy) @@ -161,6 +161,7 @@ private final Bits acceptDocs; private float parentScore; + private float parentFreq = 1; private int childDoc = -1; private int parentDoc; @@ -218,6 +219,7 @@ if (childDoc < parentDoc) { if (doScores) { parentScore = parentScorer.score(); + parentFreq = parentScorer.freq(); } //System.out.println(" " + childDoc); return childDoc; @@ -248,6 +250,11 @@ } @Override + public float freq() throws IOException { + return parentFreq; + } + + @Override public int advance(int childTarget) throws IOException { assert childTarget >= parentBits.length() || !parentBits.get(childTarget); @@ -269,6 +276,7 @@ } if (doScores) { parentScore = parentScorer.score(); + parentFreq = parentScorer.freq(); } final int firstChild = parentBits.prevSetBit(parentDoc-1); //System.out.println(" firstChild=" + firstChild); Index: lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (revision 1362969) +++ lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (working copy) @@ -208,6 +208,11 @@ } while (docId != DocIdSetIterator.NO_MORE_DOCS); return docId; } + + @Override + public float freq() { + return 1; + } } // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted Index: lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (revision 1362969) +++ lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (working copy) @@ -218,6 +218,7 @@ private int parentDoc = -1; private int prevParentDoc; private float parentScore; + private float parentFreq; private int nextChildDoc; private int[] pendingChildDocs = new int[5]; @@ -299,7 +300,9 @@ } float totalScore = 0; + float totalFreq = 0; float maxScore = Float.NEGATIVE_INFINITY; + float maxFreq = 0; childDocUpto = 0; do { @@ -315,9 +318,12 @@ if (scoreMode != ScoreMode.None) { // TODO: specialize this into dedicated classes per-scoreMode final float childScore = childScorer.score(); + final float childFreq = childScorer.freq(); pendingChildScores[childDocUpto] = childScore; maxScore = Math.max(childScore, maxScore); + maxFreq = Math.max(childFreq, maxFreq); totalScore += childScore; + totalFreq += childFreq; } childDocUpto++; nextChildDoc = childScorer.nextDoc(); @@ -329,12 +335,15 @@ switch(scoreMode) { case Avg: parentScore = totalScore / childDocUpto; + parentFreq = totalFreq / childDocUpto; break; case Max: parentScore = maxScore; + parentFreq = maxFreq; break; case Total: parentScore = totalScore; + parentFreq = totalFreq; break; case None: break; @@ -354,6 +363,11 @@ public float score() throws IOException { return parentScore; } + + @Override + public float freq() { + return parentFreq; + } @Override public int advance(int parentTarget) throws IOException { Index: lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (revision 1362969) +++ lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (working copy) @@ -327,6 +327,11 @@ public float score() { return score; } + + @Override + public float freq() { + return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw? + } @Override public int docID() { Index: lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java =================================================================== --- lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (revision 1362969) +++ lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (working copy) @@ -97,6 +97,11 @@ public float score() { return score; } + + @Override + public float freq() { + throw new UnsupportedOperationException(); // nocommit: wtf does this class do? + } @Override public int docID() { Index: solr/core/src/java/org/apache/solr/schema/LatLonType.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/LatLonType.java (revision 1362969) +++ solr/core/src/java/org/apache/solr/schema/LatLonType.java (working copy) @@ -485,6 +485,11 @@ return (float)(dist * qWeight); } + @Override + public float freq() throws IOException { + return 1; + } + public Explanation explain(int doc) throws IOException { advance(doc); boolean matched = this.doc == doc; Index: solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java =================================================================== --- solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java (revision 1362969) +++ solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java (working copy) @@ -532,6 +532,11 @@ public float score() throws IOException { return score; } + + @Override + public float freq() throws IOException { + return 1; + } @Override public int advance(int target) throws IOException { Index: solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java =================================================================== --- solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java (revision 1362969) +++ solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java (working copy) @@ -186,6 +186,11 @@ public float score() throws IOException { return theScore; } + + @Override + public float freq() throws IOException { + return 1; + } @Override public int advance(int target) throws IOException {