Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1465881) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -555,7 +555,7 @@ SegmentInfoPerCommit newSegment = flushedSegment.segmentInfo; - IndexWriter.setDiagnostics(newSegment.info, "flush"); + IndexWriter.setDiagnostics(newSegment.info, IndexWriter.SOURCE_FLUSH); IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.sizeInBytes())); Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1465881) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -185,13 +185,21 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { private static final int UNBOUNDED_MAX_MERGE_SEGMENTS = -1; - /** * Name of the write lock in the index. */ public static final String WRITE_LOCK_NAME = "write.lock"; + /** Key for the source of a segment in the {@link SegmentInfo#getDiagnostics() diagnostics}. */ + public static final String SOURCE = "source"; + /** Source of a segment which results from a merge of other segments. */ + public static final String SOURCE_MERGE = "merge"; + /** Source of a segment which results from a flush. */ + public static final String SOURCE_FLUSH = "flush"; + /** Source of a segment which results from a call to {@link #addIndexes(IndexReader...)}. */ + public static final String SOURCE_ADDINDEXES_READERS = "addIndexes(IndexReader...)"; + /** * Absolute hard maximum length for a term, in bytes once * encoded as UTF8. If a term arrives from the analyzer @@ -2432,7 +2440,7 @@ info.setFiles(new HashSet(trackingDir.getCreatedFiles())); trackingDir.getCreatedFiles().clear(); - setDiagnostics(info, "addIndexes(IndexReader...)"); + setDiagnostics(info, SOURCE_ADDINDEXES_READERS); boolean useCompoundFile; synchronized(this) { // Guard segmentInfos @@ -3455,16 +3463,15 @@ // names. final String mergeSegmentName = newSegmentName(); SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, codec, null, null); - merge.info = new SegmentInfoPerCommit(si, 0, -1L); + Map details = new HashMap(); + details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); + details.put("mergeFactor", Integer.toString(merge.segments.size())); + setDiagnostics(si, SOURCE_MERGE, details); + merge.setInfo(new SegmentInfoPerCommit(si, 0, -1L)); // Lock order: IW -> BD bufferedDeletesStream.prune(segmentInfos); - Map details = new HashMap(); - details.put("mergeMaxNumSegments", ""+merge.maxNumSegments); - details.put("mergeFactor", Integer.toString(merge.segments.size())); - setDiagnostics(si, "merge", details); - if (infoStream.isEnabled("IW")) { infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + segString(merge.segments)); } Index: lucene/core/src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/MergePolicy.java (revision 1465881) +++ lucene/core/src/java/org/apache/lucene/index/MergePolicy.java (working copy) @@ -149,6 +149,14 @@ } return Collections.unmodifiableList(readers); } + + /** + * Expert: Sets the {@link SegmentInfoPerCommit} of this {@link OneMerge}. + * Allows sub-classes to e.g. set diagnostics properties. + */ + public void setInfo(SegmentInfoPerCommit info) { + this.info = info; + } /** Expert: If {@link #getMergeReaders()} reorders document IDs, this method * must be overridden to return a mapping from the natural doc ID Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1465881) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -192,7 +192,7 @@ /** * Return the SegmentInfoPerCommit of the segment this reader is reading. */ - SegmentInfoPerCommit getSegmentInfo() { + public SegmentInfoPerCommit getSegmentInfo() { return si; } Index: lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1465881) +++ lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -591,7 +591,13 @@ // threaded...? the Collector could be sync'd? // always use single thread: for (AtomicReaderContext ctx : leaves) { // search each subreader - collector.setNextReader(ctx); + try { + collector.setNextReader(ctx); + } catch (CollectionTerminatedException e) { + // there is no doc of interest in this reader context + // continue with the following leaf + continue; + } Scorer scorer = weight.scorer(ctx, !collector.acceptsDocsOutOfOrder(), true, ctx.reader().getLiveDocs()); if (scorer != null) { try { Index: lucene/core/src/test/org/apache/lucene/search/TestEarlyTermination.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestEarlyTermination.java (revision 1465881) +++ lucene/core/src/test/org/apache/lucene/search/TestEarlyTermination.java (working copy) @@ -77,7 +77,12 @@ @Override public void setNextReader(AtomicReaderContext context) throws IOException { - collectionTerminated = false; + if (random().nextBoolean()) { + collectionTerminated = true; + throw new CollectionTerminatedException(); + } else { + collectionTerminated = false; + } } @Override Index: lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java (working copy) @@ -0,0 +1,123 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TotalHitCountCollector; + +/** + * A {@link Collector} that early terminates collection of documents on a + * per-segment basis, if the segment was sorted according to the given + * {@link Sorter}. + * + *

+ * NOTE: the {@link Collector} detects sorted segments according to + * {@link SortingMergePolicy}, so it's best used in conjunction with it. Also, + * it collects up to a specified num docs from each segment, and therefore is + * mostly suitable for use in conjunction with collectors such as + * {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}. + *

+ * NOTE: If you wrap a {@link TopDocsCollector} that sorts in the same + * order as the index order, the returned {@link TopDocsCollector#topDocs()} + * will be correct. However the total of {@link TopDocsCollector#getTotalHits() + * hit count} will be underestimated since not all matching documents will have + * been collected. + *

+ * NOTE: This {@link Collector} uses {@link Sorter#getID()} to detect + * whether a segment was sorted with the same {@link Sorter} as the one given in + * {@link #EarlyTerminatingSortingCollector(Collector, Sorter, int)}. This has + * two implications: + *

    + *
  • if {@link Sorter#getID()} is not implemented correctly and returns + * different identifiers for equivalent {@link Sorter}s, this collector will not + * detect sorted segments,
  • + *
  • if you suddenly change the {@link IndexWriter}'s + * {@link SortingMergePolicy} to sort according to another criterion and if both + * the old and the new {@link Sorter}s have the same identifier, this + * {@link Collector} will incorrectly detect sorted segments.
  • + *
+ * + * @lucene.experimental + */ +public class EarlyTerminatingSortingCollector extends Collector { + + protected final Collector in; + protected final Sorter sorter; + protected final int numDocsToCollect; + + protected int segmentTotalCollect; + protected boolean segmentSorted; + + private int numCollected; + + /** + * Create a new {@link EarlyTerminatingSortingCollector} instance. + * + * @param in + * the collector to wrap + * @param sorter + * the same sorter as the one which is used by {@link IndexWriter}'s + * {@link SortingMergePolicy} + * @param numDocsToCollect + * the number of documents to collect on each segment. When wrapping + * a {@link TopDocsCollector}, this number should be the number of + * hits. + */ + public EarlyTerminatingSortingCollector(Collector in, Sorter sorter, int numDocsToCollect) { + if (numDocsToCollect <= 0) { + throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect); + } + this.in = in; + this.sorter = sorter; + this.numDocsToCollect = numDocsToCollect; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + in.setScorer(scorer); + } + + @Override + public void collect(int doc) throws IOException { + in.collect(doc); + if (++numCollected >= segmentTotalCollect) { + throw new CollectionTerminatedException(); + } + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + in.setNextReader(context); + segmentSorted = SortingMergePolicy.isSorted(context.reader(), sorter); + segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE; + numCollected = 0; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return !segmentSorted && in.acceptsDocsOutOfOrder(); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/EarlyTerminatingSortingCollector.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java (revision 1465881) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java (working copy) @@ -52,4 +52,9 @@ return sort(reader.maxDoc(), comparator); } + @Override + public String getID() { + return "numericdv_" + fieldName; + } + } Index: lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (revision 1465881) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (working copy) @@ -26,10 +26,12 @@ import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; /** - * Sorts documents in a given index by returning a permutation on the docs. - * Implementations can call {@link #sort(int, DocComparator)} to compute the - * old-to-new permutation over the given documents and values. - * + * Sorts documents of a given index by returning a permutation on the document + * IDs. + *

NOTE: A {@link Sorter} implementation can be easily written from + * a {@link DocComparator document comparator} by using the + * {@link #sort(int, DocComparator)} helper method. This is especially useful + * when documents are directly comparable by their field values. * @lucene.experimental */ public abstract class Sorter { @@ -48,10 +50,16 @@ /** Given the ordinal of a doc ID, return its doc ID in the original index. */ public abstract int newToOld(int docID); + /** Return the number of documents in this map. This must be equal to the + * {@link AtomicReader#maxDoc() number of documents} of the + * {@link AtomicReader} which is sorted. */ + public abstract int size(); + } /** Check consistency of a {@link DocMap}, useful for assertions. */ - static boolean isConsistent(DocMap docMap, int maxDoc) { + static boolean isConsistent(DocMap docMap) { + final int maxDoc = docMap.size(); for (int i = 0; i < maxDoc; ++i) { final int newID = docMap.oldToNew(i); final int oldID = docMap.newToOld(newID); @@ -73,7 +81,11 @@ } - /** Sorts documents in reverse order. */ + /** Sorts documents in reverse order. + * NOTE: This {@link Sorter} is not idempotent. Sorting an + * {@link AtomicReader} once or twice will return two different + * {@link AtomicReader} views. This {@link Sorter} should not be used with + * {@link SortingMergePolicy}. */ public static final Sorter REVERSE_DOCS = new Sorter() { @Override public DocMap sort(final AtomicReader reader) throws IOException { @@ -87,8 +99,17 @@ public int newToOld(int docID) { return maxDoc - docID - 1; } + @Override + public int size() { + return maxDoc; + } }; } + + @Override + public String getID() { + return "reverse_docs"; + } }; private static final class DocValueSorterTemplate extends SorterTemplate { @@ -179,6 +200,11 @@ public int newToOld(int docID) { return (int) newToOld.get(docID); } + + @Override + public int size() { + return maxDoc; + } }; } @@ -192,8 +218,17 @@ * reader is already sorted. *

* NOTE: deleted documents are expected to appear in the mapping as - * well, they will however be dropped when the index is actually sorted. + * well, they will however be marked as deleted in the sorted view. */ public abstract DocMap sort(AtomicReader reader) throws IOException; - + + /** + * Returns the identifier of this {@link Sorter}. + *

This identifier is similar to {@link Object#hashCode()} and should be + * chosen so that two instances of this class that sort documents likewise + * will have the same identifier. On the contrary, this identifier should be + * different on different {@link Sorter sorters}. + */ + public abstract String getID(); + } Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (revision 1465881) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (working copy) @@ -633,7 +633,10 @@ // the reader is already sorter return reader; } - assert Sorter.isConsistent(docMap, reader.maxDoc()); + if (reader.maxDoc() != docMap.size()) { + throw new IllegalArgumentException("reader.maxDoc() should be equal to docMap.size(), got" + reader.maxDoc() + " != " + docMap.size()); + } + assert Sorter.isConsistent(docMap); return new SortingAtomicReader(reader, docMap); } Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java (revision 1465881) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java (working copy) @@ -28,8 +28,10 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfoPerCommit; import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -39,12 +41,23 @@ * before merging them. As a consequence, all segments resulting from a merge * will be sorted while segments resulting from a flush will be in the order * in which documents have been added. - *

Never use this {@link MergePolicy} if you rely on + *

NOTE: Never use this {@link MergePolicy} if you rely on * {@link IndexWriter#addDocuments(Iterable, org.apache.lucene.analysis.Analyzer)} * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. + *

NOTE: This {@link MergePolicy} should only be used with idempotent + * {@link Sorter}s so that the order of segments is predictable. For example, + * using {@link SortingMergePolicy} with {@link Sorter#REVERSE_DOCS} (which is + * not idempotent) will make the order of documents in a segment depend on the + * number of times the segment has been merged. * @lucene.experimental */ public final class SortingMergePolicy extends MergePolicy { + /** + * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that + * this segment is sorted. + */ + public static final String SORTER_ID_PROP = "sorter"; + class SortingOneMerge extends OneMerge { List unsortedReaders; @@ -72,6 +85,13 @@ // a null doc map means that the readers are already sorted return docMap == null ? unsortedReaders : Collections.singletonList(sortedView); } + + @Override + public void setInfo(SegmentInfoPerCommit info) { + Map diagnostics = info.info.getDiagnostics(); + diagnostics.put(SORTER_ID_PROP, sorter.getID()); + super.setInfo(info); + } private MonotonicAppendingLongBuffer getDeletes(List readers) { MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer(); @@ -126,6 +146,18 @@ } + /** Returns true if the given reader is sorted by the given sorter. */ + public static boolean isSorted(AtomicReader reader, Sorter sorter) { + if (reader instanceof SegmentReader) { + final SegmentReader segReader = (SegmentReader) reader; + final Map diagnostics = segReader.getSegmentInfo().info.getDiagnostics(); + if (diagnostics != null && sorter.getID().equals(diagnostics.get(SORTER_ID_PROP))) { + return true; + } + } + return false; + } + private MergeSpecification sortedMergeSpecification(MergeSpecification specification) { if (specification == null) { return null; Index: lucene/misc/src/java/org/apache/lucene/index/sorter/package.html =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/package.html (revision 1465881) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/package.html (working copy) @@ -17,11 +17,25 @@ --> -Provides index sorting capablities. The application can use one of the -pre-existing Sorter implementations, e.g. to sort by a numeric -DocValues or reverse the order of the documents. Additionally, the -application can implement a Sorter which returns a permutation on -a source Directory's document IDs, to sort the input documents by additional -values. +

Provides index sorting capablities. The application can use one of the +pre-existing Sorter implementations, e.g. to sort by a +{@link org.apache.lucene.index.sorter.NumericDocValuesSorter} +or {@link org.apache.lucene.index.sorter.Sorter#REVERSE_DOCS reverse} the order +of the documents. Additionally, the application can implement a custom +{@link org.apache.lucene.index.sorter.Sorter} which returns a permutation on +a source {@link org.apache.lucene.index.AtomicReader}'s document IDs, to sort +the input documents by additional criteria. + +

{@link org.apache.lucene.index.sorter.SortingMergePolicy} can be used to +make Lucene sort segments before merging them. This will ensure that every +segment resulting from a merge will be sorted according to the provided +{@link org.apache.lucene.index.sorter.Sorter}. This however makes merging and +thus indexing slower. + +

Sorted segments allow for early query termination when the sort order +matches index order. This makes query execution faster since not all documents +need to be visited. Please note that this is an expert feature and should not +be used without a deep understanding of Lucene merging and document collection. + Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java (revision 1465881) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java (working copy) @@ -61,6 +61,10 @@ public Sorter.DocMap sort(AtomicReader reader) throws IOException { return docMap; } + @Override + public String getID() { + return ID_FIELD; + } }); if (VERBOSE) { Index: lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java (working copy) @@ -0,0 +1,169 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +import com.carrotsearch.randomizedtesting.generators.RandomPicks; + +public class TestEarlyTermination extends LuceneTestCase { + + private int numDocs; + private List terms; + private Directory dir; + private Sorter sorter; + private RandomIndexWriter iw; + private IndexReader reader; + + @Override + public void setUp() throws Exception { + super.setUp(); + sorter = new NumericDocValuesSorter("ndv1"); + } + + private Document randomDocument() { + final Document doc = new Document(); + doc.add(new NumericDocValuesField("ndv1", random().nextInt(10))); + doc.add(new NumericDocValuesField("ndv2", random().nextInt(10))); + doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES)); + return doc; + } + + private void createRandomIndexes(int maxSegments) throws IOException { + dir = newDirectory(); + numDocs = atLeast(150); + final int numTerms = _TestUtil.nextInt(random(), 1, numDocs / 5); + Set randomTerms = new HashSet(); + while (randomTerms.size() < numTerms) { + randomTerms.add(_TestUtil.randomSimpleString(random())); + } + terms = new ArrayList(randomTerms); + final long seed = random().nextLong(); + final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); + iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sorter)); + iw = new RandomIndexWriter(new Random(seed), dir, iwc); + for (int i = 0; i < numDocs; ++i) { + final Document doc = randomDocument(); + iw.addDocument(doc); + if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) { + iw.commit(); + } + if (random().nextInt(15) == 0) { + final String term = RandomPicks.randomFrom(random(), terms); + iw.deleteDocuments(new Term("s", term)); + } + } + reader = iw.getReader(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + iw.close(); + dir.close(); + super.tearDown(); + } + + public void testEarlyTermination() throws IOException { + createRandomIndexes(5); + final int numHits = _TestUtil.nextInt(random(), 1, numDocs / 10); + final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false)); + final boolean fillFields = random().nextBoolean(); + final boolean trackDocScores = random().nextBoolean(); + final boolean trackMaxScore = random().nextBoolean(); + final boolean inOrder = random().nextBoolean(); + final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); + final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); + + final IndexSearcher searcher = newSearcher(reader); + final int iters = atLeast(5); + for (int i = 0; i < iters; ++i) { + final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); + searcher.search(query, collector1); + searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sorter, numHits)); + } + assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); + assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); + } + + public void testEarlyTerminationDifferentSorter() throws IOException { + // test that the collector works correctly when the index was sorted by a + // different sorter than the one specified in the ctor. + createRandomIndexes(5); + final int numHits = _TestUtil.nextInt(random(), 1, numDocs / 10); + final Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false)); + final boolean fillFields = random().nextBoolean(); + final boolean trackDocScores = random().nextBoolean(); + final boolean trackMaxScore = random().nextBoolean(); + final boolean inOrder = random().nextBoolean(); + final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); + final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); + + final IndexSearcher searcher = newSearcher(reader); + final int iters = atLeast(5); + for (int i = 0; i < iters; ++i) { + final TermQuery query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); + searcher.search(query, collector1); + searcher.search(query, new EarlyTerminatingSortingCollector(collector2, new NumericDocValuesSorter("ndv2"), numHits) { + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + super.setNextReader(context); + assertFalse("segment should not be recognized as sorted as different sorter was used", segmentSorted); + } + }); + } + assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); + assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); + } + + private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) { + assertEquals(scoreDocs1.length, scoreDocs2.length); + for (int i = 0; i < scoreDocs1.length; ++i) { + final ScoreDoc scoreDoc1 = scoreDocs1[i]; + final ScoreDoc scoreDoc2 = scoreDocs2[i]; + assertEquals(scoreDoc1.doc, scoreDoc2.doc); + assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f); + } + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/TestEarlyTermination.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java (revision 1465881) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java (working copy) @@ -68,7 +68,7 @@ return doc; } - private static MergePolicy newSortingMergePolicy(Sorter sorter) { + static MergePolicy newSortingMergePolicy(Sorter sorter) { // create a MP with a low merge factor so that many merges happen MergePolicy mp; if (random().nextBoolean()) {