Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk:r1136080 Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr:r1136080 Index: solr/src/java/org/apache/solr/schema/RandomSortField.java =================================================================== --- solr/src/java/org/apache/solr/schema/RandomSortField.java (revision 1136080) +++ solr/src/java/org/apache/solr/schema/RandomSortField.java (working copy) @@ -110,8 +110,8 @@ private static FieldComparatorSource randomComparatorSource = new FieldComparatorSource() { @Override - public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { - return new FieldComparator() { + public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { + return new FieldComparator() { int seed; private final int[] values = new int[numHits]; int bottomVal; @@ -142,7 +142,7 @@ } @Override - public Comparable value(int slot) { + public Integer value(int slot) { return values[slot]; } }; Index: solr/src/java/org/apache/solr/search/function/ValueSource.java =================================================================== --- solr/src/java/org/apache/solr/search/function/ValueSource.java (revision 1136080) +++ solr/src/java/org/apache/solr/search/function/ValueSource.java (working copy) @@ -138,7 +138,7 @@ * off of the {@link org.apache.solr.search.function.DocValues} for a ValueSource * instead of the normal Lucene FieldComparator that works off of a FieldCache. */ - class ValueSourceComparator extends FieldComparator { + class ValueSourceComparator extends FieldComparator { private final double[] values; private DocValues docVals; private double bottom; @@ -191,7 +191,7 @@ } @Override - public Comparable value(int slot) { + public Double value(int slot) { return values[slot]; } } Index: solr/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java =================================================================== --- solr/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (revision 1136080) +++ solr/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (working copy) @@ -49,10 +49,9 @@ } - // Copied from Lucene and modified since the Lucene version couldn't // be extended or have it's values accessed. - class MissingLastOrdComparator extends FieldComparator { + class MissingLastOrdComparator extends FieldComparator { private static final int NULL_ORD = Integer.MAX_VALUE; private final String nullVal; @@ -187,8 +186,8 @@ } @Override - public Comparable value(int slot) { - Comparable v = values[slot]; + public String value(int slot) { + String v = values[slot]; return v==null ? nullVal : v; } Index: solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (revision 1136080) +++ solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (working copy) @@ -460,8 +460,8 @@ } @Override - public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { - return new FieldComparator() { + public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { + return new FieldComparator() { FieldCache.StringIndex idIndex; private final int[] values = new int[numHits]; @@ -499,7 +499,7 @@ } @Override - public Comparable value(int slot) { + public Integer value(int slot) { return values[slot]; } }; Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene:r1136080 Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1136080) +++ lucene/contrib/CHANGES.txt (working copy) @@ -25,6 +25,10 @@ allow an app to control which indexing changes must be visible to which search requests. (Mike McCandless) + * LUCENE-3191: Added SearchGroup.merge and TopGroups.merge, to + facilitate doing grouping in a distributed environment (Uwe + Schindler, Mike McCandless) + API Changes * LUCENE-3141: add getter method to access fragInfos in FieldFragList. Index: lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java =================================================================== --- lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1136080) +++ lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (working copy) @@ -17,6 +17,9 @@ package org.apache.lucene.search.grouping; +import java.io.IOException; +import java.util.*; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -27,11 +30,9 @@ import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util._TestUtil; -import java.io.IOException; -import java.util.*; - // TODO // - should test relevance sort too // - test null @@ -243,15 +244,13 @@ return fields; } - /* - private String groupToString(BytesRef b) { + private String groupToString(String b) { if (b == null) { return "null"; } else { - return b.utf8ToString(); + return b; } } - */ private TopGroups slowGrouping(GroupDoc[] groupDocs, String searchTerm, @@ -418,6 +417,31 @@ return r; } + private static class ShardState { + + public final ShardSearcher[] subSearchers; + public final int[] docStarts; + + public ShardState(IndexSearcher s) { + IndexReader[] subReaders = s.getIndexReader().getSequentialSubReaders(); + if (subReaders == null) { + subReaders = new IndexReader[] {s.getIndexReader()}; + } + subSearchers = new ShardSearcher[subReaders.length]; + for(int searcherIDX=0;searcherIDX> topGroups = c1.getTopGroups(groupOffset, fillFields); final TopGroups groupsResult; + if (VERBOSE) { + System.out.println("TEST: topGroups:"); + if (topGroups == null) { + System.out.println(" null"); + } else { + for(SearchGroup groupx : topGroups) { + System.out.println(" " + groupToString(groupx.groupValue) + " sort=" + Arrays.toString(groupx.sortValues)); + } + } + } + + final TopGroups topGroupsShards = searchShards(s, shards, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores); if (topGroups != null) { @@ -736,8 +776,14 @@ } } } - assertEquals(docIDToID, expectedGroups, groupsResult, true, getScores); + assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores); + // Confirm merged shards match: + assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores); + if (topGroupsShards != null) { + verifyShards(shards.docStarts, topGroupsShards); + } + final boolean needsScores = getScores || getMaxScores || docSort == null; final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock); final TermAllGroupsCollector allGroupsCollector2; @@ -760,6 +806,8 @@ groupsResult2 = tempTopGroups2; } + final TopGroups topGroupsBlockShards = searchShards(s2, shards2, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores); + if (expectedGroups != null) { // Fixup scores for reader2 for (GroupDocs groupDocsHits : expectedGroups.groups) { @@ -801,8 +849,11 @@ } } - assertEquals(docIDToID2, expectedGroups, groupsResult2, false, getScores); + assertEquals(docIDToID2, expectedGroups, groupsResult2, false, true, true, getScores); + assertEquals(docIDToID2, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores); } + s.close(); + s2.close(); } finally { FieldCache.DEFAULT.purge(r); if (r2 != null) { @@ -818,7 +869,135 @@ } } - private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean testScores) { + private void verifyShards(int[] docStarts, TopGroups topGroups) { + for(GroupDocs group : topGroups.groups) { + assertTrue(group instanceof GroupDocsAndShards); + GroupDocsAndShards withShards = (GroupDocsAndShards) group; + for(int hitIDX=0;hitIDX> groups1, Collection> groups2, boolean doSortValues) { + assertEquals(groups1.size(), groups2.size()); + final Iterator> iter1 = groups1.iterator(); + final Iterator> iter2 = groups2.iterator(); + + while(iter1.hasNext()) { + assertTrue(iter2.hasNext()); + + SearchGroup group1 = iter1.next(); + SearchGroup group2 = iter2.next(); + + assertEquals(group1.groupValue, group2.groupValue); + if (doSortValues) { + assertEquals(group1.sortValues, group2.sortValues); + } + } + assertFalse(iter2.hasNext()); + } + + private TopGroups searchShards(IndexSearcher topSearcher, ShardState shardState, Query query, Sort groupSort, Sort docSort, int groupOffset, int topNGroups, int docOffset, + int topNDocs, boolean getScores, boolean getMaxScores) throws Exception { + + // TODO: swap in caching, all groups collector here + // too... + if (VERBOSE) { + System.out.println("TEST: " + shardState.subSearchers.length + " shards: " + Arrays.toString(shardState.subSearchers)); + } + // Run 1st pass collector to get top groups per shard + final Weight w = query.weight(topSearcher); + final List>> shardGroups = new ArrayList>>(); + for(int shardIDX=0;shardIDX> topGroups = c.getTopGroups(0, true); + if (topGroups != null) { + if (VERBOSE) { + System.out.println(" shard " + shardIDX + " s=" + shardState.subSearchers[shardIDX] + " " + topGroups.size() + " groups:"); + for(SearchGroup group : topGroups) { + System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues)); + } + } + shardGroups.add(topGroups); + } + } + + final Collection> mergedTopGroups = SearchGroup.merge(shardGroups, groupOffset, topNGroups, groupSort); + if (VERBOSE) { + System.out.println(" merged:"); + if (mergedTopGroups == null) { + System.out.println(" null"); + } else { + for(SearchGroup group : mergedTopGroups) { + System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues)); + } + } + } + + if (mergedTopGroups != null) { + + // Now 2nd pass: + @SuppressWarnings("unchecked") + final TopGroups[] shardTopGroups = new TopGroups[shardState.subSearchers.length]; + for(int shardIDX=0;shardIDX getDocIDSortLocs(Sort sort) { + List docFieldLocs = new ArrayList(); + SortField[] docFields = sort.getSort(); + for(int fieldIDX=0;fieldIDX groups) { + + List docFieldLocs = getDocIDSortLocs(docSort); + List docGroupFieldLocs = getDocIDSortLocs(groupSort); + + for(GroupDocs group : groups.groups) { + if (group.groupSortValues != null) { + for(int idx : docGroupFieldLocs) { + group.groupSortValues[idx] = Integer.valueOf(((Integer) group.groupSortValues[idx]).intValue() + docBase); + } + } + + for(int hitIDX=0;hitIDXnull. */ - public Comparable[] sortValues; + /** The sort values used during sorting. These are the + * groupSort field values of the highest rank document + * (by the groupSort) within the group. Can be + * null if fillFields=false had + * been passed to {@link AbstractFirstPassGroupingCollector#getTopGroups} */ + public Object[] sortValues; + + @Override + public String toString() { + return("SearchGroup(groupValue=" + groupValue + " sortValues=" + Arrays.toString(sortValues) + ")"); + } + + private static class ShardIter { + public final Iterator> iter; + public final int shardIndex; + + public ShardIter(Collection> shard, int shardIndex) { + this.shardIndex = shardIndex; + iter = shard.iterator(); + assert iter.hasNext(); + } + + public SearchGroup next() { + assert iter.hasNext(); + final SearchGroup group = iter.next(); + if (group.sortValues == null) { + throw new IllegalArgumentException("group.sortValues is null; you must pass fillFields=true to the first pass collector"); + } + return group; + } + + @Override + public String toString() { + return "ShardIter(shard=" + shardIndex + ")"; + } + } + + // Holds all shards currently on the same group + private static class MergedGroup { + + // groupValue may be null! + public final T groupValue; + + public Object[] topValues; + public final List> shards = new ArrayList>(); + public int minShardIndex; + public boolean processed; + public boolean inQueue; + + public MergedGroup(T groupValue) { + this.groupValue = groupValue; + } + + // Only for assert + private boolean neverEquals(Object _other) { + if (_other instanceof MergedGroup) { + MergedGroup other = (MergedGroup) _other; + if (groupValue == null) { + assert other.groupValue != null; + } else { + assert !groupValue.equals(other.groupValue); + } + } + return true; + } + + @Override + public boolean equals(Object _other) { + // We never have another MergedGroup instance with + // same groupValue + assert neverEquals(_other); + + if (_other instanceof MergedGroup) { + MergedGroup other = (MergedGroup) _other; + if (groupValue == null) { + return other == null; + } else { + return groupValue.equals(other); + } + } else { + return false; + } + } + + @Override + public int hashCode() { + if (groupValue == null) { + return 0; + } else { + return groupValue.hashCode(); + } + } + } + + private static class GroupComparator implements Comparator> { + + public final FieldComparator[] comparators; + public final int[] reversed; + + public GroupComparator(Sort groupSort) throws IOException { + final SortField[] sortFields = groupSort.getSort(); + comparators = new FieldComparator[sortFields.length]; + reversed = new int[sortFields.length]; + for (int compIDX = 0; compIDX < sortFields.length; compIDX++) { + final SortField sortField = sortFields[compIDX]; + comparators[compIDX] = sortField.getComparator(1, compIDX); + reversed[compIDX] = sortField.getReverse() ? -1 : 1; + } + } + + @SuppressWarnings("unchecked") + public int compare(MergedGroup group, MergedGroup other) { + if (group == other) { + return 0; + } + //System.out.println("compare group=" + group + " other=" + other); + final Object[] groupValues = group.topValues; + final Object[] otherValues = other.topValues; + //System.out.println(" groupValues=" + groupValues + " otherValues=" + otherValues); + for (int compIDX = 0;compIDX < comparators.length; compIDX++) { + final int c = reversed[compIDX] * comparators[compIDX].compareValues(groupValues[compIDX], + otherValues[compIDX]); + if (c != 0) { + return c; + } + } + + // Tie break by min shard index: + assert group.minShardIndex != other.minShardIndex; + return group.minShardIndex - other.minShardIndex; + } + } + + private static class GroupMerger { + + private final GroupComparator groupComp; + private final SortedSet> queue; + private final Map> groupsSeen; + + public GroupMerger(Sort groupSort) throws IOException { + groupComp = new GroupComparator(groupSort); + queue = new TreeSet>(groupComp); + groupsSeen = new HashMap>(); + } + + @SuppressWarnings("unchecked") + private void updateNextGroup(int topN, ShardIter shard) { + while(shard.iter.hasNext()) { + final SearchGroup group = shard.next(); + MergedGroup mergedGroup = groupsSeen.get(group.groupValue); + final boolean isNew = mergedGroup == null; + //System.out.println(" next group=" + (group.groupValue == null ? "null" : ((BytesRef) group.groupValue).utf8ToString()) + " sort=" + Arrays.toString(group.sortValues)); + + if (isNew) { + // Start a new group: + //System.out.println(" new"); + mergedGroup = new MergedGroup(group.groupValue); + mergedGroup.minShardIndex = shard.shardIndex; + assert group.sortValues != null; + mergedGroup.topValues = group.sortValues; + groupsSeen.put(group.groupValue, mergedGroup); + mergedGroup.inQueue = true; + queue.add(mergedGroup); + } else if (mergedGroup.processed) { + // This shard produced a group that we already + // processed; move on to next group... + continue; + } else { + //System.out.println(" old"); + boolean competes = false; + for(int compIDX=0;compIDX 0) { + // Definitely does not compete + break; + } else if (compIDX == groupComp.comparators.length-1) { + if (shard.shardIndex < mergedGroup.minShardIndex) { + competes = true; + } + } + } + + //System.out.println(" competes=" + competes); + + if (competes) { + // Group's sort changed -- remove & re-insert + if (mergedGroup.inQueue) { + queue.remove(mergedGroup); + } + mergedGroup.topValues = group.sortValues; + mergedGroup.minShardIndex = shard.shardIndex; + queue.add(mergedGroup); + mergedGroup.inQueue = true; + } + } + + mergedGroup.shards.add(shard); + break; + } + + // Prune un-competitive groups: + while(queue.size() > topN) { + // TODO java 1.6: .pollLast + final MergedGroup group = queue.last(); + //System.out.println("PRUNE: " + group); + queue.remove(group); + group.inQueue = false; + } + } + + public Collection> merge(List>> shards, int offset, int topN) { + + final int maxQueueSize = offset + topN; + + //System.out.println("merge"); + // Init queue: + for(int shardIDX=0;shardIDX> shard = shards.get(shardIDX); + if (!shard.isEmpty()) { + //System.out.println(" insert shard=" + shardIDX); + updateNextGroup(maxQueueSize, new ShardIter(shard, shardIDX)); + } + } + + // Pull merged topN groups: + final List> newTopGroups = new ArrayList>(); + + int count = 0; + + while(queue.size() != 0) { + // TODO Java 1.6: pollFirst() + final MergedGroup group = queue.first(); + queue.remove(group); + group.processed = true; + //System.out.println(" pop: shards=" + group.shards + " group=" + (group.groupValue == null ? "null" : (((BytesRef) group.groupValue).utf8ToString())) + " sortValues=" + Arrays.toString(group.topValues)); + if (count++ >= offset) { + final SearchGroup newGroup = new SearchGroup(); + newGroup.groupValue = group.groupValue; + newGroup.sortValues = group.topValues; + newTopGroups.add(newGroup); + if (newTopGroups.size() == topN) { + break; + } + //} else { + // System.out.println(" skip < offset"); + } + + // Advance all iters in this group: + for(ShardIter shardIter : group.shards) { + updateNextGroup(maxQueueSize, shardIter); + } + } + + if (newTopGroups.size() == 0) { + return null; + } else { + return newTopGroups; + } + } + } + + /** Merges multiple collections of top groups, for example + * obtained from separate index shards. The provided + * groupSort must match how the groups were sorted, and + * the provided SearchGroups must have been computed + * with fillFields=true passed to {@link + * AbstractFirstPassGroupingCollector#getTopGroups}. + * + *

NOTE: this returns null if the topGroups is empty. + */ + public static Collection> merge(List>> topGroups, int offset, int topN, Sort groupSort) + throws IOException { + if (topGroups.size() == 0) { + return null; + } else { + return new GroupMerger(groupSort).merge(topGroups, offset, topN); + } + } } Index: lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java =================================================================== --- lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision 1136080) +++ lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (working copy) @@ -1,7 +1,5 @@ package org.apache.lucene.search.grouping; -import org.apache.lucene.search.SortField; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -19,6 +17,13 @@ * limitations under the License. */ +import java.io.IOException; + +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; + /** Represents result returned by a grouping search. * * @lucene.experimental */ @@ -59,4 +64,112 @@ this.totalGroupCount = totalGroupCount; } + /** Merges an array of TopGroups, for example obtained + * from the second-pass collector across multiple + * shards. Each TopGroups must have been sorted by the + * same groupSort and docSort, and the top groups passed + * to all second-pass collectors must be the same. + * + * NOTE: this cannot merge totalGroupCount; ie the + * returned TopGroups will have null totalGroupCount. + * + * NOTE: the topDocs in each GroupDocs is actually + * an instance of TopDocsAndShards + */ + public static TopGroups merge(TopGroups[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN) + throws IOException { + + //System.out.println("TopGroups.merge"); + + if (shardGroups.length == 0) { + return null; + } + + int totalHitCount = 0; + int totalGroupedHitCount = 0; + + final int numGroups = shardGroups[0].groups.length; + for(TopGroups shard : shardGroups) { + if (numGroups != shard.groups.length) { + throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector"); + } + totalHitCount += shard.totalHitCount; + totalGroupedHitCount += shard.totalGroupedHitCount; + } + + @SuppressWarnings("unchecked") + final GroupDocs[] mergedGroupDocs = new GroupDocs[numGroups]; + + final TopDocs[] shardTopDocs = new TopDocs[shardGroups.length]; + + for(int groupIDX=0;groupIDX value(int slot) { - return Integer.valueOf(slotValues[slot]); - } - } - - static class MyFieldComparatorSource extends FieldComparatorSource { - @Override - public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) { - return new MyFieldComparator(numHits); - } - } - - // Test sorting w/ custom FieldComparator - public void testNewCustomFieldParserSort() throws Exception { - sort.setSort (new SortField ("parser", new MyFieldComparatorSource())); - assertMatches (full, queryA, sort, "JIHGFEDCBA"); - } - // test sorts in reverse public void testReverseSort() throws Exception { sort.setSort (new SortField (null, SortField.SCORE, true), SortField.FIELD_DOC ); Property changes on: lucene/backwards/src/test-framework ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src/test-framework:r1136080 Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1136080) +++ lucene/CHANGES.txt (working copy) @@ -473,6 +473,10 @@ of IndexInput) as its first argument. (Robert Muir, Dawid Weiss, Mike McCandless) +* LUCENE-3191: FieldComparator.value now returns an Object not + Comparable; FieldDoc.fields also changed from Comparable[] to + Object[] (Uwe Schindler, Mike McCandless) + Changes in runtime behavior * LUCENE-2834: the hash used to compute the lock file name when the @@ -520,6 +524,9 @@ algorithm over objects that implement the new TwoPhaseCommit interface (such as IndexWriter). (Shai Erera) +* LUCENE-3191: Added TopDocs.merge, to facilitate merging results from + different shards (Uwe Schindler, Mike McCandless) + Build * LUCENE-1344: Create OSGi bundle using dev-tools/maven. Index: lucene/src/test/org/apache/lucene/search/TestSort.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 1136080) +++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -422,7 +422,7 @@ assertMatches (empty, queryX, sort, ""); } - static class MyFieldComparator extends FieldComparator { + static class MyFieldComparator extends FieldComparator { int[] docValues; int[] slotValues; int bottomValue; @@ -438,6 +438,7 @@ @Override public int compare(int slot1, int slot2) { + // values are small enough that overflow won't happen return slotValues[slot1] - slotValues[slot2]; } @@ -463,7 +464,7 @@ } @Override - public Comparable value(int slot) { + public Integer value(int slot) { return Integer.valueOf(slotValues[slot]); } } Index: lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java (revision 1136080) +++ lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java (working copy) @@ -34,25 +34,24 @@ public class TestTopDocsMerge extends LuceneTestCase { - private static class ShardSearcher extends IndexSearcher { - private final IndexReader.AtomicReaderContext[] ctx; + private static class ShardSearcher { + private final IndexSearcher subSearcher; - public ShardSearcher(IndexReader.AtomicReaderContext ctx, IndexReader.ReaderContext parent) { - super(parent); - this.ctx = new IndexReader.AtomicReaderContext[] {ctx}; + public ShardSearcher(IndexReader subReader) { + this.subSearcher = new IndexSearcher(subReader); } public void search(Weight weight, Collector collector) throws IOException { - search(ctx, weight, null, collector); + subSearcher.search(weight, null, collector); } public TopDocs search(Weight weight, int topN) throws IOException { - return search(ctx, weight, null, topN); + return subSearcher.search(weight, null, topN); } @Override public String toString() { - return "ShardSearcher(" + ctx[0] + ")"; + return "ShardSearcher(" + subSearcher + ")"; } } @@ -120,16 +119,9 @@ subReaders = new IndexReader[] {searcher.getIndexReader()}; } final ShardSearcher[] subSearchers = new ShardSearcher[subReaders.length]; - final IndexReader.ReaderContext ctx = searcher.getTopReaderContext(); - if (ctx instanceof IndexReader.AtomicReaderContext) { - assert subSearchers.length == 1; - subSearchers[0] = new ShardSearcher((IndexReader.AtomicReaderContext) ctx, ctx); - } else { - final IndexReader.CompositeReaderContext compCTX = (IndexReader.CompositeReaderContext) ctx; - for(int searcherIDX=0;searcherIDX sortFields = new ArrayList(); @@ -149,7 +141,9 @@ for(int subIDX=0;subIDX { + final ScoreDoc[][] shardHits; + + public ScoreMergeSortQueue(TopDocs[] shardHits) { + initialize(shardHits.length); + this.shardHits = new ScoreDoc[shardHits.length][]; + for(int shardIDX=0;shardIDX secondScore) { + return true; + } else { + // Tie break: earlier shard wins + if (first.shardIndex < second.shardIndex) { + return true; + } else if (first.shardIndex > second.shardIndex) { + return false; + } else { + // Tie break in same shard: resolve however the + // shard had resolved it: + assert first.hitIndex != second.hitIndex; + return first.hitIndex < second.hitIndex; + } + } + } + } + + private static class MergeSortQueue extends PriorityQueue { + // These are really FieldDoc instances: + final ScoreDoc[][] shardHits; + final FieldComparator[] comparators; + final int[] reverseMul; + + public MergeSortQueue(Sort sort, TopDocs[] shardHits) throws IOException { + initialize(shardHits.length); + this.shardHits = new ScoreDoc[shardHits.length][]; + for(int shardIDX=0;shardIDX second.shardIndex) { + //System.out.println(" return tb false"); + return false; + } else { + // Tie break in same shard: resolve however the + // shard had resolved it: + //System.out.println(" return tb " + (first.hitIndex < second.hitIndex)); + assert first.hitIndex != second.hitIndex; + return first.hitIndex < second.hitIndex; + } + } + } + + /** Returned from {@link #merge}, to include the merged + * TopDocs as well as the reference to which original + * TopDocs shard each hit came from. + * + * @lucene.experimental */ + public static class TopDocsAndShards extends TopDocs { + + /** Parallel array matching hits.scoreDocs */ + public final int[] shardIndex; + + public TopDocsAndShards(int totalHits, ScoreDoc[] scoreDocs, float maxScore, int[] shardIndex) { + super(totalHits, scoreDocs, maxScore); + this.shardIndex = shardIndex; + } + } + + /** Returns a new TopDocs, containing topN results across + * the provided TopDocs, sorting by the specified {@link + * Sort}. Each of the TopDocs must have been sorted by + * the same Sort, and sort field values must have been + * filled (ie, fillFields=true must be + * passed to {@link + * TopFieldCollector#create}. + * + *

Pass sort=null to merge sort by score descending. + * + * @lucene.experimental */ + public static TopDocsAndShards merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException { + + final PriorityQueue queue; + if (sort == null) { + queue = new ScoreMergeSortQueue(shardHits); + } else { + queue = new MergeSortQueue(sort, shardHits); + } + + int totalHitCount = 0; + float maxScore = Float.MIN_VALUE; + for(int shardIDX=0;shardIDX 0) { + totalHitCount += shard.totalHits; + queue.add(new ShardRef(shardIDX)); + maxScore = Math.max(maxScore, shard.getMaxScore()); + //System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore()); + } + } + + final ScoreDoc[] hits = new ScoreDoc[Math.min(topN, totalHitCount)]; + final int[] shardIndex = new int[hits.length]; + + int hitUpto = 0; + while(hitUpto < hits.length) { + assert queue.size() > 0; + ShardRef ref = queue.pop(); + hits[hitUpto] = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++]; + shardIndex[hitUpto] = ref.shardIndex; + + //System.out.println(" hitUpto=" + hitUpto); + //System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score); + + hitUpto++; + + if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) { + // Not done with this these TopDocs yet: + queue.add(ref); + } + } + + return new TopDocsAndShards(totalHitCount, hits, maxScore, shardIndex); + } } Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 1136080) +++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -81,7 +81,7 @@ * * @lucene.experimental */ -public abstract class FieldComparator { +public abstract class FieldComparator { /** * Compare hit at slot1 with hit at slot2. @@ -158,13 +158,23 @@ * Return the actual value in the slot. * * @param slot the value - * @return value in this slot upgraded to Comparable + * @return value in this slot */ - public abstract Comparable value(int slot); + public abstract T value(int slot); + /** Returns -1 if first is less than second. Default + * impl to assume the type implements Comparable and + * invoke .compareTo; be sure to override this method if + * your FieldComparator's type isn't a Comparable or + * if your values may sometimes be null */ + @SuppressWarnings("unchecked") + public int compareValues(T first, T second) { + return ((Comparable) first).compareTo(second); + } + /** Parses field's values as byte (using {@link * FieldCache#getBytes} and sorts by ascending value */ - public static final class ByteComparator extends FieldComparator { + public static final class ByteComparator extends FieldComparator { private final byte[] values; private byte[] currentReaderValues; private final String field; @@ -203,13 +213,13 @@ } @Override - public Comparable value(int slot) { + public Byte value(int slot) { return Byte.valueOf(values[slot]); } } /** Sorts by ascending docID */ - public static final class DocComparator extends FieldComparator { + public static final class DocComparator extends FieldComparator { private final int[] docIDs; private int docBase; private int bottom; @@ -249,14 +259,14 @@ } @Override - public Comparable value(int slot) { + public Integer value(int slot) { return Integer.valueOf(docIDs[slot]); } } /** Parses field's values as double (using {@link * FieldCache#getDoubles} and sorts by ascending value */ - public static final class DoubleComparator extends FieldComparator { + public static final class DoubleComparator extends FieldComparator { private final double[] values; private double[] currentReaderValues; private final String field; @@ -310,14 +320,14 @@ } @Override - public Comparable value(int slot) { + public Double value(int slot) { return Double.valueOf(values[slot]); } } /** Parses field's values as float (using {@link * FieldCache#getFloats} and sorts by ascending value */ - public static final class FloatComparator extends FieldComparator { + public static final class FloatComparator extends FieldComparator { private final float[] values; private float[] currentReaderValues; private final String field; @@ -375,14 +385,14 @@ } @Override - public Comparable value(int slot) { + public Float value(int slot) { return Float.valueOf(values[slot]); } } /** Parses field's values as int (using {@link * FieldCache#getInts} and sorts by ascending value */ - public static final class IntComparator extends FieldComparator { + public static final class IntComparator extends FieldComparator { private final int[] values; private int[] currentReaderValues; private final String field; @@ -444,14 +454,14 @@ } @Override - public Comparable value(int slot) { + public Integer value(int slot) { return Integer.valueOf(values[slot]); } } /** Parses field's values as long (using {@link * FieldCache#getLongs} and sorts by ascending value */ - public static final class LongComparator extends FieldComparator { + public static final class LongComparator extends FieldComparator { private final long[] values; private long[] currentReaderValues; private final String field; @@ -509,7 +519,7 @@ } @Override - public Comparable value(int slot) { + public Long value(int slot) { return Long.valueOf(values[slot]); } } @@ -520,7 +530,7 @@ * using {@link TopScoreDocCollector} directly (which {@link * IndexSearcher#search} uses when no {@link Sort} is * specified). */ - public static final class RelevanceComparator extends FieldComparator { + public static final class RelevanceComparator extends FieldComparator { private final float[] scores; private float bottom; private Scorer scorer; @@ -564,14 +574,22 @@ } @Override - public Comparable value(int slot) { + public Float value(int slot) { return Float.valueOf(scores[slot]); } + + // Override because we sort reverse of natural Float order: + @Override + public int compareValues(Float first, Float second) { + // Reversed intentionally because relevance by default + // sorts descending: + return second.compareTo(first); + } } /** Parses field's values as short (using {@link * FieldCache#getShorts} and sorts by ascending value */ - public static final class ShortComparator extends FieldComparator { + public static final class ShortComparator extends FieldComparator { private final short[] values; private short[] currentReaderValues; private final String field; @@ -610,14 +628,14 @@ } @Override - public Comparable value(int slot) { + public Short value(int slot) { return Short.valueOf(values[slot]); } } /** Sorts by a field's value using the Collator for a * given Locale.*/ - public static final class StringComparatorLocale extends FieldComparator { + public static final class StringComparatorLocale extends FieldComparator { private final String[] values; private String[] currentReaderValues; @@ -676,9 +694,22 @@ } @Override - public Comparable value(int slot) { + public String value(int slot) { return values[slot]; } + + @Override + public int compareValues(String val1, String val2) { + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return collator.compare(val1, val2); + } } /** Sorts by field's natural String sort order, using @@ -690,7 +721,7 @@ * to large results, this comparator will be much faster * than {@link StringValComparator}. For very small * result sets it may be slower. */ - public static final class StringOrdValComparator extends FieldComparator { + public static final class StringOrdValComparator extends FieldComparator { private final int[] ords; private final String[] values; @@ -815,9 +846,22 @@ } @Override - public Comparable value(int slot) { + public String value(int slot) { return values[slot]; } + + @Override + public int compareValues(String val1, String val2) { + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return val1.compareTo(val2); + } public String[] getValues() { return values; @@ -836,7 +880,7 @@ * comparisons are done using String.compareTo, which is * slow for medium to large result sets but possibly * very fast for very small results sets. */ - public static final class StringValComparator extends FieldComparator { + public static final class StringValComparator extends FieldComparator { private String[] values; private String[] currentReaderValues; @@ -894,9 +938,23 @@ } @Override - public Comparable value(int slot) { + public String value(int slot) { return values[slot]; } + + @Override + public int compareValues(String val1, String val2) { + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } else { + return val1.compareTo(val2); + } + } } final protected static int binarySearch(String[] a, String key) { Index: lucene/src/java/org/apache/lucene/search/SortField.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SortField.java (revision 1136080) +++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy) @@ -88,10 +88,10 @@ // as FieldCache.STRING_INDEX. /** Represents sorting by document score (relevance). */ - public static final SortField FIELD_SCORE = new SortField (null, SCORE); + public static final SortField FIELD_SCORE = new SortField(null, SCORE); /** Represents sorting by document number (index order). */ - public static final SortField FIELD_DOC = new SortField (null, DOC); + public static final SortField FIELD_DOC = new SortField(null, DOC); private String field; private int type; // defaults to determining type dynamically @@ -108,7 +108,7 @@ * type is SCORE or DOC. * @param type Type of values in the terms. */ - public SortField (String field, int type) { + public SortField(String field, int type) { initFieldType(field, type); } @@ -119,7 +119,7 @@ * @param type Type of values in the terms. * @param reverse True if natural order should be reversed. */ - public SortField (String field, int type, boolean reverse) { + public SortField(String field, int type, boolean reverse) { initFieldType(field, type); this.reverse = reverse; } @@ -134,7 +134,7 @@ * @throws IllegalArgumentException if the parser fails to * subclass an existing numeric parser, or field is null */ - public SortField (String field, FieldCache.Parser parser) { + public SortField(String field, FieldCache.Parser parser) { this(field, parser, false); } @@ -149,7 +149,7 @@ * @throws IllegalArgumentException if the parser fails to * subclass an existing numeric parser, or field is null */ - public SortField (String field, FieldCache.Parser parser, boolean reverse) { + public SortField(String field, FieldCache.Parser parser, boolean reverse) { if (parser instanceof FieldCache.IntParser) initFieldType(field, INT); else if (parser instanceof FieldCache.FloatParser) initFieldType(field, FLOAT); else if (parser instanceof FieldCache.ShortParser) initFieldType(field, SHORT); @@ -188,7 +188,7 @@ * @param field Name of field to sort by; cannot be null. * @param comparator Returns a comparator for sorting hits. */ - public SortField (String field, FieldComparatorSource comparator) { + public SortField(String field, FieldComparatorSource comparator) { initFieldType(field, CUSTOM); this.comparatorSource = comparator; } @@ -198,7 +198,7 @@ * @param comparator Returns a comparator for sorting hits. * @param reverse True if natural order should be reversed. */ - public SortField (String field, FieldComparatorSource comparator, boolean reverse) { + public SortField(String field, FieldComparatorSource comparator, boolean reverse) { initFieldType(field, CUSTOM); this.reverse = reverse; this.comparatorSource = comparator; Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1136080) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -448,7 +448,7 @@ * Collector)}.

*/ protected TopFieldDocs search(Weight weight, Filter filter, int nDocs, - Sort sort, boolean fillFields) + Sort sort, boolean fillFields) throws IOException { if (sort == null) throw new NullPointerException(); Index: lucene/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java (revision 1136080) +++ lucene/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java (working copy) @@ -41,4 +41,9 @@ public IndexReader[] getSequentialSubReaders() { return null; } + + @Override + public String toString() { + return "SlowMultiReaderWrapper(" + super.toString() + ")"; + } } Index: lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (revision 1136080) +++ lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (working copy) @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; -import java.util.List; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -172,16 +171,24 @@ w.deleteAll(); } + private boolean doRandomOptimize = true; + + public void setDoRandomOptimize(boolean v) { + doRandomOptimize = v; + } + private void doRandomOptimize() throws IOException { - final int segCount = w.getSegmentCount(); - if (r.nextBoolean() || segCount == 0) { - // full optimize - w.optimize(); - } else { - // partial optimize - final int limit = _TestUtil.nextInt(r, 1, segCount); - w.optimize(limit); - assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount(); + if (doRandomOptimize) { + final int segCount = w.getSegmentCount(); + if (r.nextBoolean() || segCount == 0) { + // full optimize + w.optimize(); + } else { + // partial optimize + final int limit = _TestUtil.nextInt(r, 1, segCount); + w.optimize(limit); + assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount(); + } } } Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (revision 1136080) +++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (working copy) @@ -27,9 +27,9 @@ import java.io.PrintStream; import java.lang.reflect.Method; import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; import java.util.Random; -import java.util.Map; -import java.util.HashMap; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -42,6 +42,9 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; public class _TestUtil { @@ -417,4 +420,24 @@ newName.append(suffix); return new File(directory, newName.toString()); } + + public static void assertEquals(TopDocs expected, TopDocs actual) { + Assert.assertEquals("wrong total hits", expected.totalHits, actual.totalHits); + Assert.assertEquals("wrong maxScore", expected.getMaxScore(), actual.getMaxScore(), 0.0); + Assert.assertEquals("wrong hit count", expected.scoreDocs.length, actual.scoreDocs.length); + for(int hitIDX=0;hitIDX