Index: lucene/core/src/test/org/apache/lucene/search/TestSort.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestSort.java (revision 1381712) +++ lucene/core/src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -1039,7 +1039,7 @@ TopDocs td = tdc.topDocs(); ScoreDoc[] sd = td.scoreDocs; - assertEquals(10, sd.length); + assertEquals("collector=" + tdc + " sort=" + sort[i], 10, sd.length); } } } Index: lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (revision 1381712) +++ lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import org.apache.lucene.index.AtomicReaderContext; @@ -60,54 +61,111 @@ final class BooleanScorer extends Scorer { - private static final class BooleanScorerCollector extends Collector { - private BucketTable bucketTable; - private int mask; - private Scorer scorer; + static final int TABLE_SIZE = 1 << 11; + static final int TABLE_MASK = TABLE_SIZE - 1; + + final Bucket[] buckets = new Bucket[TABLE_SIZE]; + + // Which slots are valid for the current chunk + final int[] validSlots = new int[TABLE_SIZE]; + + // How many valid slots + int validCount; + + // End of current chunk we are processing + private int end; + + private final SubScorer[] scorers; + private final float[] coordFactors; + private final int minNrShouldMatch; + + private abstract class BooleanScorerCollectorBase extends Collector { + protected final Scorer scorer; + + protected BooleanScorerCollectorBase(Scorer scorer) { + this.scorer = scorer; + } + + @Override + public void setNextReader(AtomicReaderContext context) { + // not needed by this implementation + } - public BooleanScorerCollector(int mask, BucketTable bucketTable) { - this.mask = mask; - this.bucketTable = bucketTable; + @Override + public void setScorer(Scorer scorer) { + // nocommit why does this trip... + // assert scorer == this.scorer: "scorer=" + scorer + " vs this.scorer=" + this.scorer; } @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + + // First, not prohibited + private final class FirstBooleanScorerCollector extends BooleanScorerCollectorBase { + + public FirstBooleanScorerCollector(Scorer scorer) { + super(scorer); + } + + @Override public void collect(final int doc) throws IOException { - final BucketTable table = bucketTable; - final int i = doc & BucketTable.MASK; - final Bucket bucket = table.buckets[i]; - + //System.out.println(" collect doc=" + doc); + final int slot = doc & TABLE_MASK; + final Bucket bucket = buckets[slot]; + assert bucket.doc != doc: "bucket.doc=" + bucket.doc + "; doc=" + doc; + bucket.doc = doc; // set doc + bucket.score = scorer.score(); // initialize score + bucket.prohibited = false; + bucket.coord = 1; // initialize coord + validSlots[validCount++] = slot; + } + } + + // nocommit need random test that covers all of this... + + // Not first, not prohibited + private final class BooleanScorerCollector extends BooleanScorerCollectorBase { + + public BooleanScorerCollector(Scorer scorer) { + super(scorer); + } + + @Override + public void collect(final int doc) throws IOException { + final int slot = doc & TABLE_MASK; + final Bucket bucket = buckets[slot]; if (bucket.doc != doc) { // invalid bucket bucket.doc = doc; // set doc bucket.score = scorer.score(); // initialize score - bucket.bits = mask; // initialize mask bucket.coord = 1; // initialize coord - - bucket.next = table.first; // push onto valid list - table.first = bucket; - } else { // valid bucket + bucket.prohibited = false; + validSlots[validCount++] = slot; + } else if (!bucket.prohibited) { bucket.score += scorer.score(); // increment score - bucket.bits |= mask; // add bits in mask bucket.coord++; // increment coord } } + } + + // Not first, prohibited + private final class ProhibitedBooleanScorerCollector extends BooleanScorerCollectorBase { - @Override - public void setNextReader(AtomicReaderContext context) { - // not needed by this implementation + public ProhibitedBooleanScorerCollector(Scorer scorer) { + super(scorer); } @Override - public void setScorer(Scorer scorer) { - this.scorer = scorer; + public void collect(final int doc) throws IOException { + final Bucket bucket = buckets[doc & TABLE_MASK]; + bucket.doc = doc; + bucket.prohibited = true; + // NOTE: do not add to valid slots } - - @Override - public boolean acceptsDocsOutOfOrder() { - return true; - } + } - } - // An internal class which is used in score(Collector, int) for setting the // current score. This is required since Collector exposes a setScorer method // and implementations that need the score will call scorer.score(). @@ -140,159 +198,157 @@ static final class Bucket { int doc = -1; // tells if bucket is valid double score; // incremental score - // TODO: break out bool anyProhibited, int - // numRequiredMatched; then we can remove 32 limit on - // required clauses - int bits; // used for bool constraints + boolean prohibited; // used for NOT constraint int coord; // count of terms in score - Bucket next; // next valid bucket } - /** A simple hash table of document scores within a range. */ - static final class BucketTable { - public static final int SIZE = 1 << 11; - public static final int MASK = SIZE - 1; - - final Bucket[] buckets = new Bucket[SIZE]; - Bucket first = null; // head of valid list - - public BucketTable() { - // Pre-fill to save the lazy init when collecting - // each sub: - for(int idx=0;idx { + Scorer scorer; + boolean prohibited; + + public ScorerAndProhibited(Scorer scorer, boolean prohibited) { + this.scorer = scorer; + this.prohibited = prohibited; + } + + @Override + public int compareTo(ScorerAndProhibited other) { + if (prohibited && !other.prohibited) { + // All prohibited clauses first: + return -1; + } else if (!prohibited && other.prohibited) { + // All prohibited clauses first: + return 1; + } else { + // Then, sort by lower docID (= higher hit count, approx): + return scorer.docID() - other.scorer.docID(); + } + } + } BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch, List optionalScorers, List prohibitedScorers, int maxCoord) throws IOException { super(weight); this.minNrShouldMatch = minNrShouldMatch; + // Pre-fill to save the lazy init when collecting + // each sub: + for(int idx=0;idx subs = new ArrayList(); + if (optionalScorers != null && optionalScorers.size() > 0) { for (Scorer scorer : optionalScorers) { - if (scorer.nextDoc() != NO_MORE_DOCS) { - scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers); + int doc = scorer.nextDoc(); + if (doc != NO_MORE_DOCS) { + subs.add(new ScorerAndProhibited(scorer, false)); } } } if (prohibitedScorers != null && prohibitedScorers.size() > 0) { for (Scorer scorer : prohibitedScorers) { - if (scorer.nextDoc() != NO_MORE_DOCS) { - scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers); + int doc = scorer.nextDoc(); + if (doc != NO_MORE_DOCS) { + subs.add(new ScorerAndProhibited(scorer, true)); } } } + Collections.sort(subs); + + scorers = new SubScorer[subs.size()]; + //System.out.println("subs"); + for(int i=0;i getChildren() { List children = new ArrayList(); - for (SubScorer sub = scorers; sub != null; sub = sub.next) { + for (SubScorer sub : scorers) { // TODO: fix this if BQ ever sends us required clauses children.add(new ChildScorer(sub.scorer, sub.prohibited ? "MUST_NOT" : "SHOULD")); }