Index: src/java/org/apache/lucene/search/BooleanScorer.java =================================================================== --- src/java/org/apache/lucene/search/BooleanScorer.java (revision 765580) +++ src/java/org/apache/lucene/search/BooleanScorer.java (working copy) @@ -73,6 +73,10 @@ BooleanScorer(Similarity similarity, int minNrShouldMatch) { super(similarity); this.minNrShouldMatch = minNrShouldMatch; + + // compute the coord factors after every add. This removes checking if + // coordFactors == null in every call to score(). + computeCoordFactors(); } static final class SubScorer { @@ -99,30 +103,38 @@ throws IOException { int mask = 0; if (required || prohibited) { - if (nextMask == 0) - throw new IndexOutOfBoundsException - ("More than 32 required/prohibited clauses in query."); + if (nextMask == 0) { + throw new IndexOutOfBoundsException( + "More than 32 required/prohibited clauses in query."); + } mask = nextMask; nextMask = nextMask << 1; - } else - mask = 0; + } - if (!prohibited) + if (!prohibited) { maxCoord++; - - if (prohibited) + if (required) { + requiredMask |= mask; // update required mask + } + } else { + // prohibited prohibitedMask |= mask; // update prohibited mask - else if (required) - requiredMask |= mask; // update required mask + } scorers = new SubScorer(scorer, required, prohibited, bucketTable.newCollector(mask), scorers); + + // compute the coord factors after every add. This removes checking if + // coordFactors == null in every call to score(). + computeCoordFactors(); } private final void computeCoordFactors() { coordFactors = new float[maxCoord]; - for (int i = 0; i < maxCoord; i++) - coordFactors[i] = getSimilarity().coord(i, maxCoord-1); + Similarity sim = getSimilarity(); + for (int i = 0; i < maxCoord; i++) { + coordFactors[i] = sim.coord(i, maxCoord - 1); + } } private int end; @@ -145,9 +157,6 @@ } protected boolean score(Collector collector, int max) throws IOException { - if (coordFactors == null) - computeCoordFactors(); - boolean more; Bucket tmp; @@ -241,8 +250,6 @@ } public float score() { - if (coordFactors == null) - computeCoordFactors(); return current.score * coordFactors[current.coord]; } Index: src/java/org/apache/lucene/search/BooleanScorer2.java =================================================================== --- src/java/org/apache/lucene/search/BooleanScorer2.java (revision 765580) +++ src/java/org/apache/lucene/search/BooleanScorer2.java (working copy) @@ -36,10 +36,10 @@ private ArrayList prohibitedScorers = new ArrayList(); private class Coordinator { + float[] coordFactors = null; int maxCoord = 0; // to be increased for each non prohibited scorer + int nrMatchers; // to be increased by score() of match counting scorers. - private float[] coordFactors = null; - void init() { // use after all scorers have been added. coordFactors = new float[maxCoord + 1]; Similarity sim = getSimilarity(); @@ -48,15 +48,6 @@ } } - int nrMatchers; // to be increased by score() of match counting scorers. - - void initDoc() { - nrMatchers = 0; - } - - float coordFactor() { - return coordFactors[nrMatchers]; - } } private final Coordinator coordinator; @@ -85,7 +76,7 @@ * @param allowDocsOutOfOrder Whether it is allowed to return documents out of order. * This can accelerate the scoring of disjunction queries. */ - public BooleanScorer2(Similarity similarity, int minNrShouldMatch, boolean allowDocsOutOfOrder) { + public BooleanScorer2(Similarity similarity, int minNrShouldMatch, boolean allowDocsOutOfOrder) throws IOException { super(similarity); if (minNrShouldMatch < 0) { throw new IllegalArgumentException("Minimum number of optional scorers should not be negative"); @@ -105,7 +96,7 @@ * at least one of the optional scorers will have to * match during the search. */ - public BooleanScorer2(Similarity similarity, int minNrShouldMatch) { + public BooleanScorer2(Similarity similarity, int minNrShouldMatch) throws IOException { this(similarity, minNrShouldMatch, false); } @@ -114,11 +105,11 @@ * at least one of the optional scorers will have to match during the search. * @param similarity The similarity to be used. */ - public BooleanScorer2(Similarity similarity) { + public BooleanScorer2(Similarity similarity) throws IOException { this(similarity, 0, false); } - public void add(final Scorer scorer, boolean required, boolean prohibited) { + public void add(final Scorer scorer, boolean required, boolean prohibited) throws IOException { if (!prohibited) { coordinator.maxCoord++; } @@ -151,17 +142,24 @@ private class SingleMatchScorer extends Scorer { private Scorer scorer; private int lastScoredDoc = -1; + // Save the score of lastScoredDoc, so that we don't compute it more than + // once in score(). + private float lastDocScore = Float.NaN; SingleMatchScorer(Scorer scorer) { super(scorer.getSimilarity()); this.scorer = scorer; } public float score() throws IOException { - if (this.doc() >= lastScoredDoc) { - lastScoredDoc = this.doc(); + int doc = doc(); + if (doc >= lastScoredDoc) { + if (doc > lastScoredDoc) { + lastDocScore = scorer.score(); + lastScoredDoc = doc; + } coordinator.nrMatchers++; } - return scorer.score(); + return lastDocScore; } public int doc() { return scorer.doc(); @@ -178,39 +176,51 @@ } private Scorer countingDisjunctionSumScorer(final List scorers, - int minNrShouldMatch) throws IOException - // each scorer from the list counted as a single matcher - { + int minNrShouldMatch) throws IOException { + // each scorer from the list counted as a single matcher return new DisjunctionSumScorer(scorers, minNrShouldMatch) { private int lastScoredDoc = -1; + // Save the score of lastScoredDoc, so that we don't compute it more than + // once in score(). + private float lastDocScore = Float.NaN; public float score() throws IOException { - if (this.doc() >= lastScoredDoc) { - lastScoredDoc = this.doc(); + int doc = doc(); + if (doc >= lastScoredDoc) { + if (doc > lastScoredDoc) { + lastDocScore = super.score(); + lastScoredDoc = doc; + } coordinator.nrMatchers += super.nrMatchers; } - return super.score(); + return lastDocScore; } }; } - private static Similarity defaultSimilarity = new DefaultSimilarity(); + private static final Similarity defaultSimilarity = Similarity.getDefault(); private Scorer countingConjunctionSumScorer(List requiredScorers) throws IOException { // each scorer from the list counted as a single matcher final int requiredNrMatchers = requiredScorers.size(); return new ConjunctionScorer(defaultSimilarity, requiredScorers) { private int lastScoredDoc = -1; - + // Save the score of lastScoredDoc, so that we don't compute it more than + // once in score(). + private float lastDocScore = Float.NaN; public float score() throws IOException { - if (this.doc() >= lastScoredDoc) { - lastScoredDoc = this.doc(); + int doc = doc(); + if (doc >= lastScoredDoc) { + if (doc > lastScoredDoc) { + lastDocScore = super.score(); + lastScoredDoc = doc; + } coordinator.nrMatchers += requiredNrMatchers; } // All scorers match, so defaultSimilarity super.score() always has 1 as // the coordination factor. // Therefore the sum of the scores of the requiredScorers // is used as score. - return super.score(); + return lastDocScore; } }; } @@ -361,7 +371,7 @@ collector.setScorer(this); while (docNr < max) { collector.collect(docNr); - if (! countingSumScorer.next()) { + if (!countingSumScorer.next()) { return false; } docNr = countingSumScorer.doc(); @@ -379,9 +389,9 @@ } public float score() throws IOException { - coordinator.initDoc(); + coordinator.nrMatchers = 0; float sum = countingSumScorer.score(); - return sum * coordinator.coordFactor(); + return sum * coordinator.coordFactors[coordinator.nrMatchers]; } /** Skips to the first match beyond the current whose document number is Index: src/java/org/apache/lucene/search/FieldDoc.java =================================================================== --- src/java/org/apache/lucene/search/FieldDoc.java (revision 765580) +++ src/java/org/apache/lucene/search/FieldDoc.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ - /** * Expert: A ScoreDoc which also contains information about * how to sort the referenced document. In addition to the @@ -38,8 +37,7 @@ * @see ScoreDoc * @see TopFieldDocs */ -public class FieldDoc -extends ScoreDoc { +public class FieldDoc extends ScoreDoc { /** Expert: The values which are used to sort the referenced document. * The order of these will match the original sort criteria given by a @@ -60,4 +58,19 @@ super (doc, score); this.fields = fields; } + + // A convenience method for debugging. + public String toString() { + // super.toString returns the doc and score information, so just add the + // fields information + StringBuffer sb = new StringBuffer(super.toString()); + sb.append("["); + for (int i = 0; i < fields.length; i++) { + sb.append(fields[i]).append(", "); + } + sb.setLength(sb.length() - 2); // discard last ", " + sb.append("]"); + return super.toString(); + } + } \ No newline at end of file Index: src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- src/java/org/apache/lucene/search/IndexSearcher.java (revision 765580) +++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -18,9 +18,9 @@ */ import java.io.IOException; -import java.util.List; import java.util.ArrayList; -import org.apache.lucene.util.SorterTemplate; +import java.util.List; + import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.CorruptIndexException; @@ -40,7 +40,7 @@ public class IndexSearcher extends Searcher { IndexReader reader; private boolean closeReader; - private IndexReader[] sortedSubReaders; + private IndexReader[] subReaders; private int[] sortedStarts; /** Creates a searcher searching the index in the named directory. @@ -48,7 +48,7 @@ * @throws IOException if there is a low-level IO error */ public IndexSearcher(String path) throws CorruptIndexException, IOException { - this(IndexReader.open(path), true, false); + this(IndexReader.open(path), true); } /** Creates a searcher searching the index in the provided directory. @@ -56,28 +56,27 @@ * @throws IOException if there is a low-level IO error */ public IndexSearcher(Directory directory) throws CorruptIndexException, IOException { - this(IndexReader.open(directory), true, false); + this(IndexReader.open(directory), true); } /** Creates a searcher searching the provided index. */ public IndexSearcher(IndexReader r) { - this(r, false, false); + this(r, false); } - /** Expert: Creates a searcher searching the provided - * index, specifying whether searches must visit the - * documents in order. By default, segments are searched - * in order of decreasing numDocs(); if you pass true for - * docsInOrder, they will instead be searched in their - * natural (unsorted) order.*/ - public IndexSearcher(IndexReader r, boolean docsInOrder) { - this(r, false, docsInOrder); - } - - private IndexSearcher(IndexReader r, boolean closeReader, boolean docsInOrder) { + private IndexSearcher(IndexReader r, boolean closeReader) { reader = r; this.closeReader = closeReader; - sortSubReaders(docsInOrder); + + List subReadersList = new ArrayList(); + gatherSubReaders(subReadersList, reader); + subReaders = (IndexReader[]) subReadersList.toArray(new IndexReader[subReadersList.size()]); + sortedStarts = new int[subReaders.length]; + int maxDoc = 0; + for (int i = 0; i < subReaders.length; i++) { + sortedStarts[i] = maxDoc; + maxDoc += subReaders[i].maxDoc(); + } } protected void gatherSubReaders(List allSubReaders, IndexReader r) { @@ -86,54 +85,12 @@ // Add the reader itself, and do not recurse allSubReaders.add(r); } else { - for(int i=0;i num2) - return -1; - if (num1 < num2) - return 1; - return 0; - } - protected void swap(int i, int j) { - IndexReader temp = sortedSubReaders[i]; - sortedSubReaders[i] = sortedSubReaders[j]; - sortedSubReaders[j] = temp; - - int tempInt = sortedStarts[i]; - sortedStarts[i] = sortedStarts[j]; - sortedStarts[j] = tempInt; - } - }; - sorter.quickSort(0, length - 1); - } - } - /** Return the {@link IndexReader} this searches. */ public IndexReader getIndexReader() { return reader; @@ -252,9 +209,9 @@ public void search(Weight weight, Filter filter, Collector collector) throws IOException { - for (int i = 0; i < sortedSubReaders.length; i++) { // search each subreader - collector.setNextReader(sortedSubReaders[i], sortedStarts[i]); - doSearch(sortedSubReaders[i], weight, filter, collector); + for (int i = 0; i < subReaders.length; i++) { // search each subreader + collector.setNextReader(subReaders[i], sortedStarts[i]); + doSearch(subReaders[i], weight, filter, collector); } } Index: src/java/org/apache/lucene/search/MultiSearcher.java =================================================================== --- src/java/org/apache/lucene/search/MultiSearcher.java (revision 765580) +++ src/java/org/apache/lucene/search/MultiSearcher.java (working copy) @@ -236,7 +236,19 @@ for (int i = 0; i < searchables.length; i++) { // search each searcher TopFieldDocs docs = searchables[i].search (weight, filter, n, sort); - + // If one of the Sort fields is FIELD_DOC, need to fix its values, so that + // it will break ties by doc Id properly. Otherwise, it will compare to + // 'relative' doc Ids, that belong to two different searchers. + for (int j = 0; j < docs.fields.length; j++) { + if (docs.fields[j].getType() == SortField.DOC) { + // iterate over the score docs and change their fields value + for (int j2 = 0; j2 < docs.scoreDocs.length; j2++) { + FieldDoc fd = (FieldDoc) docs.scoreDocs[j2]; + fd.fields[j] = new Integer(((Integer) fd.fields[j]).intValue() + starts[i]); + } + break; + } + } if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n); totalHits += docs.totalHits; // update totalHits maxScore = Math.max(maxScore, docs.getMaxScore()); Index: src/java/org/apache/lucene/search/ParallelMultiSearcher.java =================================================================== --- src/java/org/apache/lucene/search/ParallelMultiSearcher.java (revision 765580) +++ src/java/org/apache/lucene/search/ParallelMultiSearcher.java (working copy) @@ -295,7 +295,22 @@ // the actual type of fields, in case the original list contained AUTO. // if the searchable returns null for fields, we'll have problems. if (sort != null) { - ((FieldDocSortedHitQueue)hq).setFields (((TopFieldDocs)docs).fields); + TopFieldDocs docsFields = (TopFieldDocs) docs; + // If one of the Sort fields is FIELD_DOC, need to fix its values, so that + // it will break ties by doc Id properly. Otherwise, it will compare to + // 'relative' doc Ids, that belong to two different searchers. + for (int j = 0; j < docsFields.fields.length; j++) { + if (docsFields.fields[j].getType() == SortField.DOC) { + // iterate over the score docs and change their fields value + for (int j2 = 0; j2 < docs.scoreDocs.length; j2++) { + FieldDoc fd = (FieldDoc) docs.scoreDocs[j2]; + fd.fields[j] = new Integer(((Integer) fd.fields[j]).intValue() + starts[i]); + } + break; + } + } + + ((FieldDocSortedHitQueue) hq).setFields(docsFields.fields); } ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; Index: src/java/org/apache/lucene/search/ScoreDoc.java =================================================================== --- src/java/org/apache/lucene/search/ScoreDoc.java (revision 765580) +++ src/java/org/apache/lucene/search/ScoreDoc.java (working copy) @@ -33,4 +33,10 @@ this.doc = doc; this.score = score; } + + // A convenience method for debugging. + public String toString() { + return "doc=" + doc + " score=" + score; + } + } Index: src/java/org/apache/lucene/search/Sort.java =================================================================== --- src/java/org/apache/lucene/search/Sort.java (revision 765580) +++ src/java/org/apache/lucene/search/Sort.java (working copy) @@ -121,7 +121,7 @@ * only with slightly more overhead. */ public Sort() { - this(new SortField[] { SortField.FIELD_SCORE, SortField.FIELD_DOC }); + this(SortField.FIELD_SCORE); } /** @@ -179,9 +179,7 @@ * then by index order (document number). */ public void setSort(String field, boolean reverse) { - SortField[] nfields = new SortField[] { - new SortField(field, SortField.AUTO, reverse), SortField.FIELD_DOC }; - fields = nfields; + fields = new SortField[] { new SortField(field, SortField.AUTO, reverse) }; } /** Sets the sort to the terms in each field in succession. */ Index: src/java/org/apache/lucene/search/TopDocsCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopDocsCollector.java (revision 765580) +++ src/java/org/apache/lucene/search/TopDocsCollector.java (working copy) @@ -75,7 +75,10 @@ /** Returns the top docs that were collected by this collector. */ public final TopDocs topDocs() { - return topDocs(0, pq.size()); + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + return topDocs(0, totalHits < pq.size() ? totalHits : pq.size()); } /** @@ -91,7 +94,10 @@ * results this search execution collected. */ public final TopDocs topDocs(int start) { - return topDocs(start, pq.size()); + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + return topDocs(start, totalHits < pq.size() ? totalHits : pq.size()); } /** @@ -108,18 +114,21 @@ * returned {@link TopDocs} object, which will contain all the results this * search execution collected. */ - public TopDocs topDocs(int start, int howMany) { + public final TopDocs topDocs(int start, int howMany) { - int pqsize = pq.size(); + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + int size = totalHits < pq.size() ? totalHits : pq.size(); // Don't bother to throw an exception, just return an empty TopDocs in case // the parameters are invalid or out of range. - if (start < 0 || start >= pqsize || howMany <= 0) { + if (start < 0 || start >= size || howMany <= 0) { return newTopDocs(null, start); } // We know that start < pqsize, so just fix howMany. - howMany = Math.min(pqsize - start, howMany); + howMany = Math.min(size - start, howMany); ScoreDoc[] results = new ScoreDoc[howMany]; // pq's pop() returns the 'least' element in the queue, therefore need @@ -127,7 +136,7 @@ // Note that this loop will usually not be executed, since the common usage // should be that the caller asks for the last howMany results. However it's // needed here for completeness. - for (int i = pqsize - start - howMany; i > 0; i--) { pq.pop(); } + for (int i = pq.size() - start - howMany; i > 0; i--) { pq.pop(); } // Get the requested results from pq. populateResults(results, howMany); Index: src/java/org/apache/lucene/search/TopFieldCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopFieldCollector.java (revision 765580) +++ src/java/org/apache/lucene/search/TopFieldCollector.java (working copy) @@ -41,7 +41,7 @@ * Implements a TopFieldCollector over one SortField criteria, without * tracking document scores and maxScore. */ - private static class OneComparatorNonScoringCollector extends + private static class OneComparatorNonScoringCollector extends TopFieldCollector { final FieldComparator comparator; @@ -57,16 +57,16 @@ private final void updateBottom(int doc) { // bottom.score is already set to Float.NaN in add(). bottom.docID = docBase + doc; - pq.adjustTop(); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.updateTop(); } public void collect(int doc) throws IOException { ++totalHits; if (queueFull) { - // Fastmatch: return if this hit is not competitive - final int cmp = reverseMul * comparator.compareBottom(doc); - if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + if ((reverseMul * comparator.compareBottom(doc)) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. return; } @@ -115,16 +115,16 @@ private final void updateBottom(int doc, float score) { bottom.docID = docBase + doc; bottom.score = score; - pq.adjustTop(); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.updateTop(); } public void collect(int doc) throws IOException { ++totalHits; if (queueFull) { - // Fastmatch: return if this hit is not competitive - final int cmp = reverseMul * comparator.compareBottom(doc); - if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + if ((reverseMul * comparator.compareBottom(doc)) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. return; } @@ -150,12 +150,6 @@ } } - public void setNextReader(IndexReader reader, int docBase) throws IOException { - final int numSlotsFull = queueFull ? numHits : totalHits; - this.docBase = docBase; - comparator.setNextReader(reader, docBase, numSlotsFull); - } - public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; comparator.setScorer(scorer); @@ -182,8 +176,7 @@ private final void updateBottom(int doc, float score) { bottom.docID = docBase + doc; bottom.score = score; - pq.adjustTop(); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.updateTop(); } public void collect(int doc) throws IOException { @@ -193,9 +186,10 @@ } ++totalHits; if (queueFull) { - // Fastmatch: return if this hit is not competitive - final int cmp = reverseMul * comparator.compareBottom(doc); - if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + if ((reverseMul * comparator.compareBottom(doc)) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. return; } @@ -241,8 +235,7 @@ private final void updateBottom(int doc) { // bottom.score is already set to Float.NaN in add(). bottom.docID = docBase + doc; - pq.adjustTop(); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.updateTop(); } public void collect(int doc) throws IOException { @@ -252,18 +245,16 @@ for (int i = 0;; i++) { final int c = reverseMul[i] * comparators[i].compareBottom(doc); if (c < 0) { - // Definitely not competitive + // Definitely not competitive. return; } else if (c > 0) { - // Definitely competitive + // Definitely competitive. break; } else if (i == comparators.length - 1) { - // This is the equals case. - if (doc + docBase > bottom.docID) { - // Definitely not competitive - return; - } - break; + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + return; } } @@ -327,8 +318,7 @@ private final void updateBottom(int doc, float score) { bottom.docID = docBase + doc; bottom.score = score; - pq.adjustTop(); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.updateTop(); } public void collect(int doc) throws IOException { @@ -342,18 +332,16 @@ for (int i = 0;; i++) { final int c = reverseMul[i] * comparators[i].compareBottom(doc); if (c < 0) { - // Definitely not competitive + // Definitely not competitive. return; } else if (c > 0) { - // Definitely competitive + // Definitely competitive. break; } else if (i == comparators.length - 1) { - // This is the equals case. - if (doc + docBase > bottom.docID) { - // Definitely not competitive - return; - } - break; + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + return; } } @@ -405,8 +393,7 @@ private final void updateBottom(int doc, float score) { bottom.docID = docBase + doc; bottom.score = score; - pq.adjustTop(); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.updateTop(); } public void collect(int doc) throws IOException { @@ -416,18 +403,16 @@ for (int i = 0;; i++) { final int c = reverseMul[i] * comparators[i].compareBottom(doc); if (c < 0) { - // Definitely not competitive + // Definitely not competitive. return; } else if (c > 0) { - // Definitely competitive + // Definitely competitive. break; } else if (i == comparators.length - 1) { - // This is the equals case. - if (doc + docBase > bottom.docID) { - // Definitely not competitive - return; - } - break; + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + return; } } @@ -451,7 +436,7 @@ comparators[i].copy(slot, doc); } - // Compute score only if it competitive. + // Compute score only if it is competitive. final float score = scorer.score(); add(slot, doc, score); if (queueFull) { @@ -551,8 +536,7 @@ } final void add(int slot, int doc, float score) { - pq.put(new FieldValueHitQueue.Entry(slot, docBase + doc, score)); - bottom = (FieldValueHitQueue.Entry) pq.top(); + bottom = (Entry) pq.add(new Entry(slot, docBase + doc, score)); queueFull = totalHits == numHits; } @@ -562,14 +546,15 @@ */ protected void populateResults(ScoreDoc[] results, int howMany) { - FieldValueHitQueue queue = (FieldValueHitQueue) pq; if (fillFields) { - for (int i = queue.size() - 1; i >= 0; i--) { - results[i] = queue.fillFields((FieldValueHitQueue.Entry) queue.pop()); + // avoid casting if unnecessary. + FieldValueHitQueue queue = (FieldValueHitQueue) pq; + for (int i = howMany - 1; i >= 0; i--) { + results[i] = queue.fillFields((Entry) queue.pop()); } } else { - for (int i = queue.size() - 1; i >= 0; i--) { - Entry entry = (FieldValueHitQueue.Entry) queue.pop(); + for (int i = howMany - 1; i >= 0; i--) { + Entry entry = (Entry) pq.pop(); results[i] = new FieldDoc(entry.docID, entry.score); } } Index: src/java/org/apache/lucene/search/TopScoreDocCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopScoreDocCollector.java (revision 765580) +++ src/java/org/apache/lucene/search/TopScoreDocCollector.java (working copy) @@ -31,7 +31,7 @@ */ public final class TopScoreDocCollector extends TopDocsCollector { - private ScoreDoc reusableSD; + private ScoreDoc pqTop; private int docBase = 0; private Scorer scorer; @@ -40,6 +40,13 @@ */ public TopScoreDocCollector(int numHits) { super(new HitQueue(numHits)); + + // pre-populate pq with sentinel values + ScoreDoc[] docs = new ScoreDoc[numHits]; + for (int i = 0; i < docs.length; i++) { + docs[i] = new ScoreDoc(-1, Float.NEGATIVE_INFINITY); + } + pqTop = (ScoreDoc) pq.addSentinelObjects(docs); } protected TopDocs newTopDocs(ScoreDoc[] results, int start) { @@ -66,18 +73,15 @@ public void collect(int doc) throws IOException { float score = scorer.score(); totalHits++; - if (reusableSD == null) { - reusableSD = new ScoreDoc(doc + docBase, score); - } else if (score >= reusableSD.score) { - // reusableSD holds the last "rejected" entry, so, if - // this new score is not better than that, there's no - // need to try inserting it - reusableSD.doc = doc + docBase; - reusableSD.score = score; - } else { + if (score <= pqTop.score) { + // Since docs are returned in-order (i.e., increasing doc Id), a document + // with equal score to pqTop.score cannot compete since HitQueue favors + // documents with lower doc Ids. Therefore reject those docs too. return; } - reusableSD = (ScoreDoc) pq.insertWithOverflow(reusableSD); + pqTop.doc = doc + docBase; + pqTop.score = score; + pqTop = (ScoreDoc) pq.updateTop(); } public void setNextReader(IndexReader reader, int base) { Index: src/java/org/apache/lucene/util/PriorityQueue.java =================================================================== --- src/java/org/apache/lucene/util/PriorityQueue.java (revision 765580) +++ src/java/org/apache/lucene/util/PriorityQueue.java (working copy) @@ -43,10 +43,62 @@ } /** - * Adds an Object to a PriorityQueue in log(size) time. - * If one tries to add more objects than maxSize from initialize - * a RuntimeException (ArrayIndexOutOfBound) is thrown. + * Adds sentinel values to the queue. This method can be used to pre-populate + * the queue with sentinel values that guarantee + * {@link #lessThan(Object, Object)} will always favor a non-sentinel value + * over a sentinel one.
+ * If one were to use this method (with HitQueue for example), then the + * following usage pattern is recommended: + * + *
+   * PriorityQueue pq = new HitQueue(numHits);
+   * // pre-populate pq with sentinel values
+   * ScoreDoc[] docs = new ScoreDoc[numHits];
+   * for (int i = 0; i < docs.length; i++) {
+   *   docs[i] = new ScoreDoc(-1, Float.NEGATIVE_INFINITY);
+   * }
+   * // save the 'top' element
+   * ScoreDoc pqTop = (ScoreDoc) pq.addSentinelObjects(docs);
+   * <...>
+   * // now in order to add a new element, which is 'better' than top (after 
+   * // you've verified it is better), it is as simple as:
+   * pqTop.score = newScore;
+   * pqTop.doc = newDoc;
+   * pqTop = pq.adjustTop();
+   * 
+ * + * NOTE: this method sets size to maxSize when it completes. Calling + * pq.size() will always return maxSize (unless elements were extracted), + * therefore if there's a need to know how many 'real' elements are in the + * queue, it is advised to keep track of that outside the queue.
+ * NOTE: this method must be called before any object is added to the + * queue, as it initalizes all the elements in the queue. + * + * @param sentinelVals + * the array of objects to return. Must be of size 'maxSize', or + * otherwise an {@link IllegalArgumentException} is thrown + * @return the element at the top of the queue. */ + public final Object addSentinelObjects(Object[] sentinelVals) { + if (sentinelVals.length != maxSize) { + throw new IllegalArgumentException("number of sentinel values must be identical to maxSize"); + } + + for (int i = 0; i < sentinelVals.length; i++) { + heap[i + 1] = sentinelVals[i]; + } + size = maxSize; + return heap[1]; + } + + /** + * Adds an Object to a PriorityQueue in log(size) time. If one tries to add + * more objects than maxSize from initialize a RuntimeException + * (ArrayIndexOutOfBound) is thrown. + * + * @deprecated use {@link #add(Object)} which returns the new top object, + * saving an additional call to {@link #top()}. + */ public final void put(Object element) { size++; heap[size] = element; @@ -54,10 +106,27 @@ } /** - * Adds element to the PriorityQueue in log(size) time if either - * the PriorityQueue is not full, or not lessThan(element, top()). + * Adds an Object to a PriorityQueue in log(size) time. If one tries to add + * more objects than maxSize from initialize an + * {@link ArrayIndexOutOfBoundsException} is thrown. + * + * @return the new 'top' element in the queue. + */ + public final Object add(Object element) { + size++; + heap[size] = element; + upHeap(); + return heap[1]; + } + + /** + * Adds element to the PriorityQueue in log(size) time if either the + * PriorityQueue is not full, or not lessThan(element, top()). + * * @param element * @return true if element is added, false otherwise. + * @deprecated use {@link #insertWithOverflow(Object)} instead, which + * encourages objects reuse. */ public boolean insert(Object element) { return insertWithOverflow(element) != element; @@ -109,16 +178,53 @@ return null; } - /** Should be called when the Object at top changes values. Still log(n) - * worst case, but it's at least twice as fast to
-   *  { pq.top().change(); pq.adjustTop(); }
-   * 
instead of
-   *  { o = pq.pop(); o.change(); pq.push(o); }
+  /**
+   * Should be called when the Object at top changes values. Still log(n) worst
+   * case, but it's at least twice as fast to
+   * 
+   * 
+   * pq.top().change();
+   * pq.adjustTop();
    * 
+ * + * instead of + * + *
+   * o = pq.pop();
+   * o.change();
+   * pq.push(o);
+   * 
+ * + * @deprecated use {@link #updateTop()} which returns the new top element and + * saves an additional call to {@link #top()}. */ public final void adjustTop() { downHeap(); } + + /** + * Should be called when the Object at top changes values. Still log(n) worst + * case, but it's at least twice as fast to + * + *
+   * pq.top().change();
+   * pq.updateTop();
+   * 
+ * + * instead of + * + *
+   * o = pq.pop();
+   * o.change();
+   * pq.push(o);
+   * 
+ * + * @return the new 'top' element. + */ + public final Object updateTop() { + downHeap(); + return heap[1]; + } /** Returns the number of elements currently stored in the PriorityQueue. */ public final int size() { @@ -127,8 +233,9 @@ /** Removes all entries from the PriorityQueue. */ public final void clear() { - for (int i = 0; i <= size; i++) + for (int i = 0; i <= size; i++) { heap[i] = null; + } size = 0; } Index: src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 765580) +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -39,20 +39,17 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader.FieldOption; -import org.apache.lucene.search.Collector; +import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.MockRAMDirectory; -import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.NoSuchDirectoryException; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -1637,59 +1634,6 @@ dir.close(); } - // LUCENE-1483 - public void testDocsInOrderSearch() throws Throwable { - Directory dir = new MockRAMDirectory(); - - IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), - IndexWriter.MaxFieldLength.LIMITED); - writer.addDocument(createDocument("a")); - writer.commit(); - writer.addDocument(createDocument("a")); - writer.addDocument(createDocument("a")); - writer.close(); - - Query q = new TermQuery(new Term("id", "a")); - - IndexSearcher s = new IndexSearcher(dir); - s.search(q, new Collector() { - int lastDocBase = -1; - public void setNextReader(IndexReader reader, int docBase) { - if (lastDocBase == -1) { - assertEquals(1, docBase); - } else if (lastDocBase == 1) { - assertEquals(0, docBase); - } else { - fail(); - } - lastDocBase = docBase; - } - public void collect(int doc) {} - public void setScorer(Scorer scorer) {} - }); - s.close(); - - IndexReader r = IndexReader.open(dir); - s = new IndexSearcher(r, true); - s.search(q, new Collector() { - int lastDocBase = -1; - public void setNextReader(IndexReader reader, int docBase) { - if (lastDocBase == -1) { - assertEquals(0, docBase); - } else if (lastDocBase == 0) { - assertEquals(1, docBase); - } else { - fail(); - } - lastDocBase = docBase; - } - public void collect(int doc) {} - public void setScorer(Scorer scorer) {} - }); - s.close(); - r.close(); - } - // LUCENE-1579: Ensure that on a cloned reader, segments // reuse the doc values arrays in FieldCache public void testFieldCacheReuseAfterClone() throws Exception { Index: src/test/org/apache/lucene/search/TestSort.java =================================================================== --- src/test/org/apache/lucene/search/TestSort.java (revision 765580) +++ src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -17,9 +17,20 @@ * limitations under the License. */ +import java.io.IOException; +import java.io.Serializable; +import java.rmi.Naming; +import java.rmi.registry.LocateRegistry; +import java.util.BitSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Locale; +import java.util.Random; + import junit.framework.Test; import junit.framework.TestSuite; import junit.textui.TestRunner; + import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -33,17 +44,6 @@ import org.apache.lucene.util.DocIdBitSet; import org.apache.lucene.util.LuceneTestCase; -import java.io.IOException; -import java.io.Serializable; -import java.rmi.Naming; -import java.rmi.registry.LocateRegistry; -import java.util.BitSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Locale; -import java.util.Random; -import java.util.regex.Pattern; - /** * Unit tests for sorting code. * @@ -53,9 +53,7 @@ * @version $Id$ */ -public class TestSort -extends LuceneTestCase -implements Serializable { +public class TestSort extends LuceneTestCase implements Serializable { private static final int NUM_STRINGS = 6000; private Searcher full; @@ -118,7 +116,7 @@ { "Y", "g", "1", "0.2", null, null, null, null, null, null, null, null}, { "Z", "f g", null, null, null, null, null, null, null, null, null, null} }; - + // create an index of all the documents, or just the x, or just the y documents private Searcher getIndex (boolean even, boolean odd) throws IOException { @@ -343,28 +341,28 @@ public void testCustomFieldParserSort() throws Exception { sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.IntParser(){ public final int parseInt(final String val) { - return (int) (val.charAt(0)-'A') * 123456; + return (val.charAt(0)-'A') * 123456; } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.FloatParser(){ public final float parseFloat(final String val) { - return (float) Math.sqrt( (double) val.charAt(0) ); + return (float) Math.sqrt( val.charAt(0) ); } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); sort.setSort (new SortField[] { new SortField ("parser", new ExtendedFieldCache.LongParser(){ public final long parseLong(final String val) { - return (long) (val.charAt(0)-'A') * 1234567890L; + return (val.charAt(0)-'A') * 1234567890L; } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); sort.setSort (new SortField[] { new SortField ("parser", new ExtendedFieldCache.DoubleParser(){ public final double parseDouble(final String val) { - return Math.pow( (double) val.charAt(0), (double) (val.charAt(0)-'A') ); + return Math.pow( val.charAt(0), (val.charAt(0)-'A') ); } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); @@ -432,7 +430,7 @@ public void setNextReader(IndexReader reader, int docBase, int numSlotsFull) throws IOException { docValues = FieldCache.DEFAULT.getInts(reader, "parser", new FieldCache.IntParser() { public final int parseInt(final String val) { - return (int) (val.charAt(0)-'A') * 123456; + return (val.charAt(0)-'A') * 123456; } }); } @@ -642,20 +640,20 @@ // test a variety of sorts using more than one searcher public void testMultiSort() throws Exception { MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY }); - runMultiSorts (searcher); + runMultiSorts(searcher, false); } // test a variety of sorts using a parallel multisearcher public void testParallelMultiSort() throws Exception { Searcher searcher = new ParallelMultiSearcher (new Searchable[] { searchX, searchY }); - runMultiSorts (searcher); + runMultiSorts(searcher, false); } // test a variety of sorts using a remote searcher public void testRemoteSort() throws Exception { Searchable searcher = getRemote(); MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); - runMultiSorts (multi); + runMultiSorts(multi, true); // this runs on the full index } // test custom search when remote @@ -892,68 +890,73 @@ } // runs a variety of sorts useful for multisearchers - private void runMultiSorts (Searcher multi) throws Exception { - sort.setSort (SortField.FIELD_DOC); - assertMatchesPattern (multi, queryA, sort, "[AB]{2}[CD]{2}[EF]{2}[GH]{2}[IJ]{2}"); + private void runMultiSorts(Searcher multi, boolean isFull) throws Exception { + sort.setSort(SortField.FIELD_DOC); + String expected = isFull ? "ABCDEFGHIJ" : "ACEGIBDFHJ"; + assertMatches(multi, queryA, sort, expected); - sort.setSort (new SortField ("int", SortField.INT)); - assertMatchesPattern (multi, queryA, sort, "IDHFGJ[ABE]{3}C"); + sort.setSort(new SortField ("int", SortField.INT)); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); - sort.setSort (new SortField[] {new SortField ("int", SortField.INT), SortField.FIELD_DOC}); - assertMatchesPattern (multi, queryA, sort, "IDHFGJ[AB]{2}EC"); + sort.setSort(new SortField[] {new SortField ("int", SortField.INT), SortField.FIELD_DOC}); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); - sort.setSort ("int"); - assertMatchesPattern (multi, queryA, sort, "IDHFGJ[AB]{2}EC"); + sort.setSort("int"); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); - sort.setSort (new SortField[] {new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC}); - assertMatchesPattern (multi, queryA, sort, "GDHJ[CI]{2}EFAB"); + sort.setSort(new SortField[] {new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC}); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); - sort.setSort ("float"); - assertMatchesPattern (multi, queryA, sort, "GDHJ[CI]{2}EFAB"); + sort.setSort("float"); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); - sort.setSort ("string"); - assertMatches (multi, queryA, sort, "DJAIHGFEBC"); + sort.setSort("string"); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - sort.setSort ("int", true); - assertMatchesPattern (multi, queryA, sort, "C[AB]{2}EJGFHDI"); + sort.setSort("int", true); + expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; + assertMatches(multi, queryA, sort, expected); - sort.setSort ("float", true); - assertMatchesPattern (multi, queryA, sort, "BAFE[IC]{2}JHDG"); + sort.setSort("float", true); + assertMatches(multi, queryA, sort, "BAFECIJHDG"); - sort.setSort ("string", true); - assertMatches (multi, queryA, sort, "CBEFGHIAJD"); + sort.setSort("string", true); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort (new SortField[] { new SortField ("string", Locale.US) }); - assertMatches (multi, queryA, sort, "DJAIHGFEBC"); + sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) }); - assertMatches (multi, queryA, sort, "CBEFGHIAJD"); + sort.setSort(new SortField[] { new SortField ("string", Locale.US, true) }); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort (new String[] {"int","float"}); - assertMatches (multi, queryA, sort, "IDHFGJEABC"); + sort.setSort(new String[] {"int","float"}); + assertMatches(multi, queryA, sort, "IDHFGJEABC"); - sort.setSort (new String[] {"float","string"}); - assertMatches (multi, queryA, sort, "GDHJICEFAB"); + sort.setSort(new String[] {"float","string"}); + assertMatches(multi, queryA, sort, "GDHJICEFAB"); - sort.setSort ("int"); - assertMatches (multi, queryF, sort, "IZJ"); + sort.setSort("int"); + assertMatches(multi, queryF, sort, "IZJ"); - sort.setSort ("int", true); - assertMatches (multi, queryF, sort, "JZI"); + sort.setSort("int", true); + assertMatches(multi, queryF, sort, "JZI"); - sort.setSort ("float"); - assertMatches (multi, queryF, sort, "ZJI"); + sort.setSort("float"); + assertMatches(multi, queryF, sort, "ZJI"); - sort.setSort ("string"); - assertMatches (multi, queryF, sort, "ZJI"); + sort.setSort("string"); + assertMatches(multi, queryF, sort, "ZJI"); - sort.setSort ("string", true); - assertMatches (multi, queryF, sort, "IJZ"); + sort.setSort("string", true); + assertMatches(multi, queryF, sort, "IJZ"); } // make sure the documents returned by the search match the expected list - private void assertMatches (Searcher searcher, Query query, Sort sort, String expectedResult) - throws IOException { + private void assertMatches(Searcher searcher, Query query, Sort sort, + String expectedResult) throws IOException { //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; TopDocs hits = searcher.search (query, null, expectedResult.length(), sort); ScoreDoc[] result = hits.scoreDocs; @@ -970,23 +973,6 @@ assertEquals (expectedResult, buff.toString()); } - // make sure the documents returned by the search match the expected list pattern - private void assertMatchesPattern (Searcher searcher, Query query, Sort sort, String pattern) - throws IOException { - ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; - StringBuffer buff = new StringBuffer(10); - int n = result.length; - for (int i=0; i