Index: lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java (revision 1558111) +++ lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Random; @@ -29,6 +30,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; @@ -60,30 +62,44 @@ final List docValues = new ArrayList(); // TODO: deletions while (numDocs < NUM_DOCS) { - final String s; - if (random.nextBoolean()) { - s = _TestUtil.randomSimpleString(random, maxLength); - } else { - s = _TestUtil.randomUnicodeString(random, maxLength); - } - final BytesRef br = new BytesRef(s); + final Document doc = new Document(); - if (!allowDups) { - if (seen.contains(s)) { - continue; + // 10% of the time, the document is missing the value: + final BytesRef br; + if (random().nextInt(10) != 7) { + final String s; + if (random.nextBoolean()) { + s = _TestUtil.randomSimpleString(random, maxLength); + } else { + s = _TestUtil.randomUnicodeString(random, maxLength); } - seen.add(s); + + if (!allowDups) { + if (seen.contains(s)) { + continue; + } + seen.add(s); + } + + if (VERBOSE) { + System.out.println(" " + numDocs + ": s=" + s); + } + + br = new BytesRef(s); + doc.add(new SortedDocValuesField("stringdv", br)); + doc.add(newStringField("string", s, Field.Store.NO)); + docValues.add(br); + + } else { + br = null; + if (VERBOSE) { + System.out.println(" " + numDocs + ": "); + } + docValues.add(null); } - if (VERBOSE) { - System.out.println(" " + numDocs + ": s=" + s); - } - - final Document doc = new Document(); - doc.add(new SortedDocValuesField("stringdv", br)); - doc.add(newStringField("string", s, Field.Store.NO)); doc.add(new NumericDocValuesField("id", numDocs)); - docValues.add(br); + doc.add(new StoredField("id", numDocs)); writer.addDocument(doc); numDocs++; @@ -103,13 +119,26 @@ final int ITERS = atLeast(100); for(int iter=0;iter" : br.utf8ToString())); if (idx == hitCount-1) { break; } @@ -161,12 +217,30 @@ System.out.println(" actual:"); for(int hitIDX=0;hitIDX" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id")); } } for(int hitIDX=0;hitIDX setNextReader(AtomicReaderContext context) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @@ -156,7 +161,7 @@ } @Override - public int compareDocToValue(int doc, Object value) { + public int compareTop(int doc) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } } Index: lucene/core/src/java/org/apache/lucene/search/SortField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/SortField.java (revision 1558111) +++ lucene/core/src/java/org/apache/lucene/search/SortField.java (working copy) @@ -106,6 +106,9 @@ // Used for 'sortMissingFirst/Last' public Object missingValue = null; + // Only used with type=STRING + public boolean sortMissingLast; + /** Creates a sort by terms in the given field with the type of term * values explicitly given. * @param field Name of field to sort by. Can be null if @@ -165,13 +168,24 @@ this.reverse = reverse; this.parser = parser; } + + /** Pass this to {@link #setMissingValue} to have missing + * string values sort first. */ + public final static Object STRING_FIRST = new Object(); - public SortField setMissingValue(Object missingValue) { - if (type != Type.INT && type != Type.FLOAT && type != Type.LONG && type != Type.DOUBLE) { - throw new IllegalArgumentException( "Missing value only works for numeric types" ); + /** Pass this to {@link #setMissingValue} to have missing + * string values sort last. */ + public final static Object STRING_LAST = new Object(); + + public void setMissingValue(Object missingValue) { + if (type == Type.STRING) { + if (missingValue != STRING_FIRST && missingValue != STRING_LAST) { + throw new IllegalArgumentException("For STRING type, missing value must be either STRING_FIRST or STRING_LAST"); + } + } else if (type != Type.INT && type != Type.FLOAT && type != Type.LONG && type != Type.DOUBLE) { + throw new IllegalArgumentException("Missing value only works for numeric or STRING types"); } this.missingValue = missingValue; - return this; } /** Creates a sort with a custom comparison function. @@ -376,9 +390,10 @@ return comparatorSource.newComparator(field, numHits, sortPos, reverse); case STRING: - return new FieldComparator.TermOrdValComparator(numHits, field); + return new FieldComparator.TermOrdValComparator(numHits, field, missingValue == STRING_LAST); case STRING_VAL: + // TODO: should we remove this? who really uses it? return new FieldComparator.TermValComparator(numHits, field); case REWRITEABLE: Index: lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java (revision 1558111) +++ lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java (working copy) @@ -869,6 +869,11 @@ // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. maxScore = Float.NEGATIVE_INFINITY; + + // Tell all comparators their top value: + for(int i=0;i 0) { - // Not yet collected - sameValues = false; - //System.out.println(" keep: after"); - break; - } - } - - // Tie-break by docID: - if (sameValues && doc <= afterDoc) { - // Already collected on a previous page - //System.out.println(" skip: tie-break"); - return; - } - - collectedHits++; - float score = Float.NaN; if (trackMaxScore) { score = scorer.score(); @@ -921,7 +896,8 @@ } if (queueFull) { - // Fastmatch: return if this hit is not competitive + // Fastmatch: return if this hit is no better than + // the worst hit currently in the queue: for (int i = 0;; i++) { final int c = reverseMul[i] * comparators[i].compareBottom(doc); if (c < 0) { @@ -939,7 +915,37 @@ break; } } + } + //System.out.println(" collect doc=" + doc); + + // Check if this hit was already collected on a + // previous page: + boolean sameValues = true; + for(int compIDX=0;compIDX 0) { + // Already collected on a previous page + //System.out.println(" skip: before"); + return; + } else if (cmp < 0) { + // Not yet collected + sameValues = false; + //System.out.println(" keep: after"); + break; + } + } + + // Tie-break by docID: + if (sameValues && doc <= afterDoc) { + // Already collected on a previous page + //System.out.println(" skip: tie-break"); + return; + } + + if (queueFull) { // This hit is competitive - replace bottom element in queue & adjustTop for (int i = 0; i < comparators.length; i++) { comparators[i].copy(bottom.slot, doc); @@ -955,6 +961,8 @@ comparators[i].setBottom(bottom.slot); } } else { + collectedHits++; + // Startup transient: queue hasn't gathered numHits yet final int slot = collectedHits - 1; //System.out.println(" slot=" + slot); Index: lucene/core/src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/FieldComparator.java (revision 1558111) +++ lucene/core/src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -62,6 +62,18 @@ *
  • {@link #compareBottom} Compare a new hit (docID) * against the "weakest" (bottom) entry in the queue. * + *
  • {@link #setTopValue} This method is called by + * {@link TopFieldCollector} to notify the + * FieldComparator of the top most value, which is + * used by future calls to {@link #compareTop}. + * + *
  • {@link #compareBottom} Compare a new hit (docID) + * against the "weakest" (bottom) entry in the queue. + * + *
  • {@link #compareTop} Compare a new hit (docID) + * against the top value previously set by a call to + * {@link #setTopValue}. + * *
  • {@link #copy} Installs a new hit into the * priority queue. The {@link FieldValueHitQueue} * calls this method when a new hit is competitive. @@ -104,7 +116,15 @@ public abstract void setBottom(final int slot); /** - * Compare the bottom of the queue with doc. This will + * Record the top value, for future calls to {@link + * #compareTop}. This is only called for searches that + * use searchAfter (deep paging), and is called before any + * calls to {@link #setNextReader}. + */ + public abstract void setTopValue(Object value); + + /** + * Compare the bottom of the queue with this doc. This will * only invoked after setBottom has been called. This * should return the same result as {@link * #compare(int,int)}} as if bottom were slot1 and the new @@ -123,6 +143,22 @@ public abstract int compareBottom(int doc) throws IOException; /** + * Compare the top value with this doc. This will + * only invoked after setTopValue has been called. This + * should return the same result as {@link + * #compare(int,int)}} as if topValue were slot1 and the new + * document were slot 2. This is only called for searches that + * use searchAfter (deep paging). + * + * @param doc that was hit + * @return any N < 0 if the doc's value is sorted after + * the bottom entry (not competitive), any N > 0 if the + * doc's value is sorted before the bottom entry and 0 if + * they are equal. + */ + public abstract int compareTop(int doc) throws IOException; + + /** * This method is called when a new hit is competitive. * You should copy any state associated with this document * that will be required for future comparisons, into the @@ -184,10 +220,6 @@ } } - /** Returns negative result if the doc's value is less - * than the provided value. */ - public abstract int compareDocToValue(int doc, T value) throws IOException; - /** * Base FieldComparator class for numeric types */ @@ -223,6 +255,7 @@ private final DoubleParser parser; private FieldCache.Doubles currentReaderValues; private double bottom; + private double topValue; DoubleComparator(int numHits, String field, FieldCache.Parser parser, Double missingValue) { super(field, missingValue); @@ -273,20 +306,27 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Double) == false) { + throw new IllegalArgumentException("top value must be a Double; got: " + value.getClass()); + } + topValue = ((Double) value).doubleValue(); + } + + @Override public Double value(int slot) { return Double.valueOf(values[slot]); } @Override - public int compareDocToValue(int doc, Double valueObj) { - final double value = valueObj.doubleValue(); + public int compareTop(int doc) { double docValue = currentReaderValues.get(doc); // Test for docValue == 0 to save Bits.get method call for // the common case (doc has value and value is non-zero): if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { docValue = missingValue; } - return Double.compare(docValue, value); + return Double.compare(topValue, docValue); } } @@ -297,6 +337,7 @@ private final FloatParser parser; private FieldCache.Floats currentReaderValues; private float bottom; + private float topValue; FloatComparator(int numHits, String field, FieldCache.Parser parser, Float missingValue) { super(field, missingValue); @@ -348,20 +389,27 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Float) == false) { + throw new IllegalArgumentException("top value must be a Float; got: " + value.getClass()); + } + topValue = ((Float) value).floatValue(); + } + + @Override public Float value(int slot) { return Float.valueOf(values[slot]); } @Override - public int compareDocToValue(int doc, Float valueObj) { - final float value = valueObj.floatValue(); + public int compareTop(int doc) { float docValue = currentReaderValues.get(doc); // Test for docValue == 0 to save Bits.get method call for // the common case (doc has value and value is non-zero): if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { docValue = missingValue; } - return Float.compare(docValue, value); + return Float.compare(topValue, docValue); } } @@ -372,6 +420,7 @@ private final IntParser parser; private FieldCache.Ints currentReaderValues; private int bottom; // Value of bottom of queue + private int topValue; IntComparator(int numHits, String field, FieldCache.Parser parser, Integer missingValue) { super(field, missingValue); @@ -422,20 +471,27 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Integer) == false) { + throw new IllegalArgumentException("top value must be an Integer; got: " + value.getClass()); + } + topValue = ((Integer) value).intValue(); + } + + @Override public Integer value(int slot) { return Integer.valueOf(values[slot]); } @Override - public int compareDocToValue(int doc, Integer valueObj) { - final int value = valueObj.intValue(); + public int compareTop(int doc) { int docValue = currentReaderValues.get(doc); // Test for docValue == 0 to save Bits.get method call for // the common case (doc has value and value is non-zero): if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { docValue = missingValue; } - return Integer.compare(docValue, value); + return Integer.compare(topValue, docValue); } } @@ -446,6 +502,7 @@ private final LongParser parser; private FieldCache.Longs currentReaderValues; private long bottom; + private long topValue; LongComparator(int numHits, String field, FieldCache.Parser parser, Long missingValue) { super(field, missingValue); @@ -498,20 +555,27 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Long) == false) { + throw new IllegalArgumentException("top value must be a Long; got: " + value.getClass()); + } + topValue = ((Long) value).longValue(); + } + + @Override public Long value(int slot) { return Long.valueOf(values[slot]); } @Override - public int compareDocToValue(int doc, Long valueObj) { - final long value = valueObj.longValue(); + public int compareTop(int doc) { long docValue = currentReaderValues.get(doc); // Test for docValue == 0 to save Bits.get method call for // the common case (doc has value and value is non-zero): if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) { docValue = missingValue; } - return Long.compare(docValue, value); + return Long.compare(topValue, docValue); } } @@ -525,7 +589,8 @@ private final float[] scores; private float bottom; private Scorer scorer; - + private float topValue; + RelevanceComparator(int numHits) { scores = new float[numHits]; } @@ -559,6 +624,14 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Float) == false) { + throw new IllegalArgumentException("top value must be a Float; got: " + value.getClass()); + } + topValue = ((Float) value).floatValue(); + } + + @Override public void setScorer(Scorer scorer) { // wrap with a ScoreCachingWrappingScorer so that successive calls to // score() will not incur score computation over and @@ -584,11 +657,10 @@ } @Override - public int compareDocToValue(int doc, Float valueObj) throws IOException { - final float value = valueObj.floatValue(); + public int compareTop(int doc) throws IOException { float docValue = scorer.score(); assert !Float.isNaN(docValue); - return Float.compare(value, docValue); + return Float.compare(docValue, topValue); } } @@ -597,6 +669,7 @@ private final int[] docIDs; private int docBase; private int bottom; + private int topValue; DocComparator(int numHits) { docIDs = new int[numHits]; @@ -634,15 +707,22 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Integer) == false) { + throw new IllegalArgumentException("top value must be an Integer; got: " + value.getClass()); + } + topValue = ((Integer) value).intValue(); + } + + @Override public Integer value(int slot) { return Integer.valueOf(docIDs[slot]); } @Override - public int compareDocToValue(int doc, Integer valueObj) { - final int value = valueObj.intValue(); + public int compareTop(int doc) { int docValue = docBase + doc; - return Integer.compare(docValue, value); + return Integer.compare(topValue, docValue); } } @@ -700,13 +780,42 @@ @lucene.internal */ BytesRef bottomValue; + /** Set by setTopValue. */ + BytesRef topValue; + boolean topSameReader; + int topOrd; + + private int docBase; + final BytesRef tempBR = new BytesRef(); + /** -1 if missing values are sorted first, 1 if they are + * sorted last */ + final int missingSortCmp; + + /** Which ordinal to use for a missing value. */ + final int missingOrd; + + /** Creates this, sorting missing values first. */ public TermOrdValComparator(int numHits, String field) { + this(numHits, field, false); + } + + /** Creates this, with control over how missing values + * are sorted. Pass sortMissingLast=true to put + * missing values at the end. */ + public TermOrdValComparator(int numHits, String field, boolean sortMissingLast) { ords = new int[numHits]; values = new BytesRef[numHits]; readerGen = new int[numHits]; this.field = field; + if (sortMissingLast) { + missingSortCmp = 1; + missingOrd = Integer.MAX_VALUE; + } else { + missingSortCmp = -1; + missingOrd = -1; + } } @Override @@ -721,140 +830,74 @@ if (val2 == null) { return 0; } - return -1; + return missingSortCmp; } else if (val2 == null) { - return 1; + return -missingSortCmp; } return val1.compareTo(val2); } @Override public int compareBottom(int doc) { - throw new UnsupportedOperationException(); + assert bottomSlot != -1; + int docOrd = termsIndex.getOrd(doc); + if (docOrd == -1) { + docOrd = missingOrd; + } + if (bottomSameReader) { + // ord is precisely comparable, even in the equal case + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; + } else { + return -1; + } } @Override public void copy(int slot, int doc) { - throw new UnsupportedOperationException(); - } - - @Override - public int compareDocToValue(int doc, BytesRef value) { int ord = termsIndex.getOrd(doc); if (ord == -1) { - if (value == null) { - return 0; + ord = missingOrd; + values[slot] = null; + } else { + assert ord >= 0; + if (values[slot] == null) { + values[slot] = new BytesRef(); } - return -1; - } else if (value == null) { - return 1; + termsIndex.lookupOrd(ord, values[slot]); } - termsIndex.lookupOrd(ord, tempBR); - return tempBR.compareTo(value); + ords[slot] = ord; + readerGen[slot] = currentReaderGen; } + + @Override + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + docBase = context.docBase; + termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); + currentReaderGen++; - /** Base class for specialized (per bit width of the - * ords) per-segment comparator. NOTE: this is messy; - * we do this only because hotspot can't reliably inline - * the underlying array access when looking up doc->ord - * @lucene.internal - */ - abstract class PerSegmentComparator extends FieldComparator { - - @Override - public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - return TermOrdValComparator.this.setNextReader(context); - } - - @Override - public int compare(int slot1, int slot2) { - return TermOrdValComparator.this.compare(slot1, slot2); - } - - @Override - public void setBottom(final int bottom) { - TermOrdValComparator.this.setBottom(bottom); - } - - @Override - public BytesRef value(int slot) { - return TermOrdValComparator.this.value(slot); - } - - @Override - public int compareValues(BytesRef val1, BytesRef val2) { - if (val1 == null) { - if (val2 == null) { - return 0; - } - return -1; - } else if (val2 == null) { - return 1; - } - return val1.compareTo(val2); - } - - @Override - public int compareDocToValue(int doc, BytesRef value) { - return TermOrdValComparator.this.compareDocToValue(doc, value); - } - } - - // Used per-segment when docToOrd is null: - private final class AnyOrdComparator extends PerSegmentComparator { - private final SortedDocValues termsIndex; - private final int docBase; - - public AnyOrdComparator(SortedDocValues termsIndex, int docBase) { - this.termsIndex = termsIndex; - this.docBase = docBase; - } - - @Override - public int compareBottom(int doc) { - assert bottomSlot != -1; - final int docOrd = termsIndex.getOrd(doc); - if (bottomSameReader) { - // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; - } else if (bottomOrd >= docOrd) { - // the equals case always means bottom is > doc - // (because we set bottomOrd to the lower bound in - // setBottom): - return 1; + if (topValue != null) { + // Recompute topOrd/SameReader + int ord = termsIndex.lookupTerm(topValue); + if (ord >= 0) { + topSameReader = true; + topOrd = ord; } else { - return -1; + topSameReader = false; + topOrd = -ord-2; } } - @Override - public void copy(int slot, int doc) { - final int ord = termsIndex.getOrd(doc); - ords[slot] = ord; - if (ord == -1) { - values[slot] = null; - } else { - assert ord >= 0; - if (values[slot] == null) { - values[slot] = new BytesRef(); - } - termsIndex.lookupOrd(ord, values[slot]); - } - readerGen[slot] = currentReaderGen; - } - } - - @Override - public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - final int docBase = context.docBase; - termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); - FieldComparator perSegComp = new AnyOrdComparator(termsIndex, docBase); - currentReaderGen++; if (bottomSlot != -1) { - perSegComp.setBottom(bottomSlot); + // Recompute bottomOrd/SameReader + setBottom(bottomSlot); } - return perSegComp; + return this; } @Override @@ -867,18 +910,18 @@ bottomSameReader = true; } else { if (bottomValue == null) { - // -1 ord is null for all segments - assert ords[bottomSlot] == -1; - bottomOrd = -1; + // missingOrd is null for all segments + assert ords[bottomSlot] == missingOrd; + bottomOrd = missingOrd; bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = termsIndex.lookupTerm(bottomValue); - if (index < 0) { - bottomOrd = -index - 2; + final int ord = termsIndex.lookupTerm(bottomValue); + if (ord < 0) { + bottomOrd = -ord - 2; bottomSameReader = false; } else { - bottomOrd = index; + bottomOrd = ord; // exact value match bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; @@ -889,27 +932,71 @@ } @Override + public void setTopValue(Object value) { + // nocommit what if it's null? need test + if ((value instanceof BytesRef) == false) { + throw new IllegalArgumentException("top value must be a BytesRef; got: " + value.getClass()); + } + topValue = BytesRef.deepCopyOf((BytesRef) value); + } + + @Override public BytesRef value(int slot) { return values[slot]; } + + @Override + public int compareTop(int doc) { + + int ord = termsIndex.getOrd(doc); + + if (topSameReader) { + // ord is precisely comparable, even in the equal case + return topOrd - ord; + } else if (ord <= topOrd) { + // the equals case always means doc is < value + // (because we set lastOrd to the lower bound) + return 1; + } else { + return -1; + } + } + + @Override + public int compareValues(BytesRef val1, BytesRef val2) { + if (val1 == null) { + if (val2 == null) { + return 0; + } + return missingSortCmp; + } else if (val2 == null) { + return -missingSortCmp; + } + return val1.compareTo(val2); + } } - // just used internally in this comparator - private static final byte[] MISSING_BYTES = new byte[0]; - /** Sorts by field's natural Term sort order. All * comparisons are done using BytesRef.compareTo, which is * slow for medium to large result sets but possibly * very fast for very small results sets. */ + // TODO: should we remove this? who really uses it? public static final class TermValComparator extends FieldComparator { + // sentinel, just used internally in this comparator + private static final byte[] MISSING_BYTES = new byte[0]; + private BytesRef[] values; private BinaryDocValues docTerms; private Bits docsWithField; private final String field; private BytesRef bottom; + private BytesRef topValue; private final BytesRef tempBR = new BytesRef(); + // nocommit add missing last support here? + + /** Sole constructor. */ TermValComparator(int numHits, String field) { values = new BytesRef[numHits]; this.field = field; @@ -972,6 +1059,14 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof BytesRef) == false) { + throw new IllegalArgumentException("top value must be a BytesRef; got: " + value.getClass()); + } + topValue = BytesRef.deepCopyOf((BytesRef) value); + } + + @Override public BytesRef value(int slot) { return values[slot]; } @@ -990,12 +1085,12 @@ } @Override - public int compareDocToValue(int doc, BytesRef value) { + public int compareTop(int doc) { docTerms.get(doc, tempBR); if (tempBR.length == 0 && docsWithField.get(doc) == false) { tempBR.bytes = MISSING_BYTES; } - return tempBR.compareTo(value); + return topValue.compareTo(tempBR); } } } Index: lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java (revision 1558111) +++ lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java (working copy) @@ -96,7 +96,7 @@ public final class UnicodeUtil { /** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms - * one would normally encounter, and definitely bigger than any UTF-8 terms. + * (e.g. collation keys) one would normally encounter, and definitely bigger than any UTF-8 terms. *

    * WARNING: This is not a valid UTF8 Term **/ Index: lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinFieldComparator.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinFieldComparator.java (revision 1558111) +++ lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinFieldComparator.java (working copy) @@ -60,6 +60,11 @@ } @Override + public void setTopValue(Object value) { + wrappedComparator.setTopValue(value); + } + + @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { DocIdSet innerDocuments = childFilter.getDocIdSet(context, null); if (isEmpty(innerDocuments)) { @@ -193,7 +198,7 @@ @Override @SuppressWarnings("unchecked") - public int compareDocToValue(int parentDoc, Object value) throws IOException { + public int compareTop(int parentDoc) throws IOException { if (parentDoc == 0 || parentDocuments == null || childDocuments == null) { return 0; } @@ -216,7 +221,7 @@ if (childDoc >= parentDoc || childDoc == -1) { return cmp; } - int cmp1 = wrappedComparator.compareDocToValue(childDoc, value); + int cmp1 = wrappedComparator.compareTop(childDoc); if (cmp1 > 0) { return cmp1; } else { @@ -309,7 +314,7 @@ @Override @SuppressWarnings("unchecked") - public int compareDocToValue(int parentDoc, Object value) throws IOException { + public int compareTop(int parentDoc) throws IOException { if (parentDoc == 0 || parentDocuments == null || childDocuments == null) { return 0; } @@ -330,7 +335,7 @@ if (childDoc >= parentDoc || childDoc == -1) { return cmp; } - int cmp1 = wrappedComparator.compareDocToValue(childDoc, value); + int cmp1 = wrappedComparator.compareTop(childDoc); if (cmp1 < 0) { return cmp1; } else { Index: lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java =================================================================== --- lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java (revision 1558111) +++ lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java (working copy) @@ -131,6 +131,7 @@ private FunctionValues docVals; private double bottom; private final Map fcontext; + private double topValue; ValueSourceComparator(Map fcontext, int numHits) { this.fcontext = fcontext; @@ -164,15 +165,22 @@ } @Override + public void setTopValue(final Object value) { + if ((value instanceof Double) == false) { + throw new IllegalArgumentException("value must be a Double; got: " + value.getClass()); + } + this.topValue = ((Double) value).doubleValue(); + } + + @Override public Double value(int slot) { return values[slot]; } @Override - public int compareDocToValue(int doc, Double valueObj) { - final double value = valueObj; + public int compareTop(int doc) { final double docValue = docVals.doubleVal(doc); - return Double.compare(docValue, value); + return Double.compare(topValue, docValue); } } } Index: lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java =================================================================== --- lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java (revision 1558111) +++ lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java (working copy) @@ -44,6 +44,7 @@ private final String field; final Collator collator; private String bottom; + private String topValue; private final BytesRef tempBR = new BytesRef(); public SlowCollatedStringComparator(int numHits, String field, Collator collator) { @@ -105,6 +106,14 @@ } @Override + public void setTopValue(final Object value) { + if ((value instanceof String) == false) { + throw new IllegalArgumentException("value must be a String; got: " + value.getClass()); + } + this.topValue = (String) value; + } + + @Override public String value(int slot) { return values[slot]; } @@ -124,7 +133,7 @@ } @Override - public int compareDocToValue(int doc, String value) { + public int compareTop(int doc) { currentDocTerms.get(doc, tempBR); final String docValue; if (tempBR.length == 0 && docsWithField.get(doc) == false) { @@ -132,6 +141,6 @@ } else { docValue = tempBR.utf8ToString(); } - return compareValues(docValue, value); + return compareValues(topValue, docValue); } } Index: lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionComparator.java =================================================================== --- lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionComparator.java (revision 1558111) +++ lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionComparator.java (working copy) @@ -30,6 +30,7 @@ class ExpressionComparator extends FieldComparator { private final double[] values; private double bottom; + private double topValue; private ValueSource source; private FunctionValues scores; @@ -67,6 +68,14 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Double) == false) { + throw new IllegalArgumentException("value must be a Double; got: " + value.getClass()); + } + topValue = ((Double) value).doubleValue(); + } + + @Override public int compareBottom(int doc) throws IOException { return Double.compare(bottom, scores.doubleVal(doc)); } @@ -88,7 +97,7 @@ } @Override - public int compareDocToValue(int doc, Double valueObj) throws IOException { - return Double.compare(scores.doubleVal(doc), valueObj.doubleValue()); + public int compareTop(int doc) throws IOException { + return Double.compare(topValue, scores.doubleVal(doc)); } } Index: solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java =================================================================== --- solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (revision 1558111) +++ solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (working copy) @@ -88,6 +88,11 @@ } @Override + public void setTopValue(Object value) { + throw new UnsupportedOperationException(); + } + + @Override public int compareBottom(int doc) { throw new UnsupportedOperationException(); } @@ -123,7 +128,7 @@ } @Override - public int compareDocToValue(int doc, Comparable docValue) { + public int compareTop(int doc) { throw new UnsupportedOperationException(); } @@ -169,6 +174,11 @@ } @Override + public void setTopValue(Object value) { + throw new UnsupportedOperationException(); + } + + @Override public int compare(int slot1, int slot2) { if (readerGen[slot1] == readerGen[slot2]) { return ords[slot1] - ords[slot2]; @@ -223,19 +233,16 @@ return values==null ? parent.NULL_VAL : values[slot]; } + // this stuff caches the ordinal of the last 'value' from compareDocToValue + BytesRef lastValue = new BytesRef(); // sentinel + int lastOrd; + int lastReaderGen = -1; + boolean lastSameReader = false; + + // nocommit, broken: nuke all of this @Override - public int compareDocToValue(int doc, BytesRef value) { - int docOrd = termsIndex.getOrd(doc); - if (docOrd == -1) { - if (value == null) { - return 0; - } - return 1; - } else if (value == null) { - return -1; - } - termsIndex.lookupOrd(docOrd, tempBR); - return tempBR.compareTo(value); + public int compareTop(int doc) { + throw new UnsupportedOperationException(); } } @@ -247,34 +254,18 @@ @Override public int compareBottom(int doc) { assert bottomSlot != -1; - int order = termsIndex.getOrd(doc); - if (order == -1) order = NULL_ORD; + int docOrd = termsIndex.getOrd(doc); + if (docOrd == -1) docOrd = NULL_ORD; if (bottomSameReader) { - // ord is precisely comparable, even in the equal - // case - return bottomOrd - order; + // ord is precisely comparable, even in the equal case + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - // take care of the case where both vals are null - if (order == NULL_ORD) { - return 0; - } - - // and at this point we know that neither value is null, so safe to compare - if (order == NULL_ORD) { - return bottomValue.compareTo(parent.NULL_VAL); - } else { - termsIndex.lookupOrd(order, tempBR); - return bottomValue.compareTo(tempBR); - } + return -1; } } Index: solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (revision 1558111) +++ solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (working copy) @@ -576,6 +576,7 @@ return new FieldComparator() { private final int[] values = new int[numHits]; private int bottomVal; + private int topVal; private TermsEnum termsEnum; private DocsEnum docsEnum; Set seen = new HashSet(elevations.ids.size()); @@ -590,6 +591,14 @@ bottomVal = values[slot]; } + @Override + public void setTopValue(Object value) { + if ((value instanceof Integer) == false) { + throw new UnsupportedOperationException("value must be Integer; got: " + value.getClass()); + } + topVal = ((Integer) value).intValue(); + } + private int docVal(int doc) { if (ordSet.size() > 0) { int slot = ordSet.find(doc); @@ -646,10 +655,9 @@ } @Override - public int compareDocToValue(int doc, Integer valueObj) { - final int value = valueObj.intValue(); + public int compareTop(int doc) { final int docValue = docVal(doc); - return docValue - value; // values will be small enough that there is no overflow concern + return topVal - docValue; // values will be small enough that there is no overflow concern } }; } Index: solr/core/src/java/org/apache/solr/schema/RandomSortField.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/RandomSortField.java (revision 1558111) +++ solr/core/src/java/org/apache/solr/schema/RandomSortField.java (working copy) @@ -109,6 +109,7 @@ int seed; private final int[] values = new int[numHits]; int bottomVal; + int topVal; @Override public int compare(int slot1, int slot2) { @@ -121,6 +122,14 @@ } @Override + public void setTopValue(Object value) { + if ((value instanceof Integer) == false) { + throw new UnsupportedOperationException("value must be Integer; got: " + value.getClass()); + } + topVal = ((Integer) value).intValue(); + } + + @Override public int compareBottom(int doc) { return bottomVal - hash(doc+seed); } @@ -142,9 +151,9 @@ } @Override - public int compareDocToValue(int doc, Integer valueObj) { + public int compareTop(int doc) { // values will be positive... no overflow possible. - return hash(doc+seed) - valueObj.intValue(); + return topVal - hash(doc+seed); } }; } Index: solr/core/src/java/org/apache/solr/schema/EnumField.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/EnumField.java (revision 1558111) +++ solr/core/src/java/org/apache/solr/schema/EnumField.java (working copy) @@ -178,7 +178,9 @@ public SortField getSortField(SchemaField field, boolean top) { field.checkSortability(); final Object missingValue = Integer.MIN_VALUE; - return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue); + SortField sf = new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top); + sf.setMissingValue(missingValue); + return sf; } /** Index: solr/core/src/java/org/apache/solr/schema/TrieField.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/TrieField.java (revision 1558111) +++ solr/core/src/java/org/apache/solr/schema/TrieField.java (working copy) @@ -142,7 +142,9 @@ Object missingValue = null; boolean sortMissingLast = field.sortMissingLast(); boolean sortMissingFirst = field.sortMissingFirst(); - + + SortField sf; + switch (type) { case INTEGER: if( sortMissingLast ) { @@ -151,7 +153,9 @@ else if( sortMissingFirst ) { missingValue = top ? Integer.MAX_VALUE : Integer.MIN_VALUE; } - return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue); + sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top); + sf.setMissingValue(missingValue); + return sf; case FLOAT: if( sortMissingLast ) { @@ -160,7 +164,9 @@ else if( sortMissingFirst ) { missingValue = top ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY; } - return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top).setMissingValue(missingValue); + sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top); + sf.setMissingValue(missingValue); + return sf; case DATE: // fallthrough case LONG: @@ -170,7 +176,9 @@ else if( sortMissingFirst ) { missingValue = top ? Long.MAX_VALUE : Long.MIN_VALUE; } - return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top).setMissingValue(missingValue); + sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top); + sf.setMissingValue(missingValue); + return sf; case DOUBLE: if( sortMissingLast ) { @@ -179,7 +187,9 @@ else if( sortMissingFirst ) { missingValue = top ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; } - return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top).setMissingValue(missingValue); + sf = new SortField( field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top); + sf.setMissingValue(missingValue); + return sf; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);