Index: lucene/src/test/org/apache/lucene/search/TestSort.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 1186760) +++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -87,6 +87,7 @@ public static void beforeClass() throws Exception { NUM_STRINGS = atLeast(6000); } + // document data: // the tracer field is used to determine which document was hit // the contents field is used to search and sort by relevance @@ -117,7 +118,7 @@ { "c", "m", "5", "5.0", "5", null, null, "5", "5", "5", "5", null}, { "d", "m", null, null, null, null, null, null, null, null, null, null} }; - + // create an index of all the documents, or just the x, or just the y documents private IndexSearcher getIndex (boolean even, boolean odd) throws IOException { @@ -125,6 +126,21 @@ dirs.add(indexStore); RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + final ValueType stringDVType; + if (dvStringSorted) { + // Index sorted + stringDVType = random.nextBoolean() ? ValueType.BYTES_VAR_SORTED : ValueType.BYTES_FIXED_SORTED; + } else { + // Index non-sorted + if (random.nextBoolean()) { + // Fixed + stringDVType = random.nextBoolean() ? ValueType.BYTES_FIXED_STRAIGHT : ValueType.BYTES_FIXED_DEREF; + } else { + // Var + stringDVType = random.nextBoolean() ? ValueType.BYTES_VAR_STRAIGHT : ValueType.BYTES_VAR_DEREF; + } + } + FieldType ft1 = new FieldType(); ft1.setStored(true); FieldType ft2 = new FieldType(); @@ -138,7 +154,7 @@ Field f = new StringField ("int", data[i][2]); if (supportsDocValues) { f = IndexDocValuesField.build(f, ValueType.VAR_INTS); - }; + } doc.add(f); } if (data[i][3] != null) { @@ -148,7 +164,13 @@ } doc.add(f); } - if (data[i][4] != null) doc.add (new StringField ("string", data[i][4])); + if (data[i][4] != null) { + Field f = new StringField ("string", data[i][4]); + if (supportsDocValues) { + f = IndexDocValuesField.build(f, stringDVType); + } + doc.add(f); + } if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5])); if (data[i][6] != null) doc.add (new StringField ("i18n", data[i][6])); if (data[i][7] != null) doc.add (new StringField ("long", data[i][7])); @@ -191,23 +213,55 @@ setMaxBufferedDocs(4). setMergePolicy(newLogMergePolicy(97)) ); - FieldType customType = new FieldType(); - customType.setStored(true); + FieldType onlyStored = new FieldType(); + onlyStored.setStored(true); + final int fixedLen = getRandomNumber(2, 8); + final int fixedLen2 = getRandomNumber(1, 4); for (int i=0; i creator; @@ -387,42 +460,71 @@ /** * Test String sorting: small queue to many matches, multi field sort, reverse sort */ - public void testStringSort() throws IOException { - ScoreDoc[] result = null; - IndexSearcher searcher = getFullStrings(); + public void testStringSort() throws Exception { + // Normal string field, var length sort.setSort( new SortField("string", SortField.Type.STRING), new SortField("string2", SortField.Type.STRING, true), SortField.FIELD_DOC); + verifyStringSort(sort); - result = searcher.search(new MatchAllDocsQuery(), null, 500, sort).scoreDocs; + // Normal string field, fixed length + sort.setSort( + new SortField("string_fixed", SortField.Type.STRING), + new SortField("string2_fixed", SortField.Type.STRING, true), + SortField.FIELD_DOC); + verifyStringSort(sort); + // Doc values field, var length + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex")); + sort.setSort( + useDocValues(new SortField("string", getDVStringSortType())), + useDocValues(new SortField("string2", getDVStringSortType(), true)), + SortField.FIELD_DOC); + verifyStringSort(sort); + + // Doc values field, fixed length + sort.setSort( + useDocValues(new SortField("string_fixed", getDVStringSortType())), + useDocValues(new SortField("string2_fixed", getDVStringSortType(), true)), + SortField.FIELD_DOC); + verifyStringSort(sort); + } + + private void verifyStringSort(Sort sort) throws Exception { + final IndexSearcher searcher = getFullStrings(); + final ScoreDoc[] result = searcher.search(new MatchAllDocsQuery(), null, _TestUtil.nextInt(random, 500, searcher.maxDoc()), sort).scoreDocs; StringBuilder buff = new StringBuilder(); int n = result.length; String last = null; String lastSub = null; int lastDocId = 0; boolean fail = false; + final String fieldSuffix = sort.getSort()[0].getField().endsWith("_fixed") ? "_fixed" : ""; for (int x = 0; x < n; ++x) { Document doc2 = searcher.doc(result[x].doc); - IndexableField[] v = doc2.getFields("tracer"); - IndexableField[] v2 = doc2.getFields("tracer2"); + IndexableField[] v = doc2.getFields("tracer" + fieldSuffix); + IndexableField[] v2 = doc2.getFields("tracer2" + fieldSuffix); for (int j = 0; j < v.length; ++j) { + buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+")\n"); if (last != null) { int cmp = v[j].stringValue().compareTo(last); if (!(cmp >= 0)) { // ensure first field is in order fail = true; System.out.println("fail:" + v[j] + " < " + last); + buff.append(" WRONG tracer\n"); } if (cmp == 0) { // ensure second field is in reverse order cmp = v2[j].stringValue().compareTo(lastSub); if (cmp > 0) { fail = true; System.out.println("rev field fail:" + v2[j] + " > " + lastSub); + buff.append(" WRONG tracer2\n"); } else if(cmp == 0) { // ensure docid is in order if (result[x].doc < lastDocId) { fail = true; System.out.println("doc fail:" + result[x].doc + " > " + lastDocId); + buff.append(" WRONG docID\n"); } } } @@ -430,11 +532,10 @@ last = v[j].stringValue(); lastSub = v2[j].stringValue(); lastDocId = result[x].doc; - buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+") "); } } - if(fail) { - System.out.println("topn field1(field2)(docID):" + buff); + if (fail) { + System.out.println("topn field1(field2)(docID):\n" + buff); } assertFalse("Found sort results out of order", fail); searcher.close(); @@ -531,6 +632,16 @@ sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), new SortField ("string", SortField.Type.STRING) ); assertMatches (empty, queryX, sort, ""); + + sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)), SortField.FIELD_DOC ); + assertMatches (empty, queryX, sort, ""); + + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), + useDocValues(new SortField ("string", getDVStringSortType())) ); + assertMatches (empty, queryX, sort, ""); + + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), useDocValues(new SortField ("string", getDVStringSortType())) ); + assertMatches (empty, queryX, sort, ""); } static class MyFieldComparator extends FieldComparator { @@ -624,11 +735,18 @@ sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) ); assertMatches (full, queryX, sort, "AECIG"); assertMatches (full, queryY, sort, "BFJHD"); + + sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)) ); + assertMatches (full, queryX, sort, "CEGIA"); + assertMatches (full, queryY, sort, "BFHJD"); } } // test sorting when the sort field is empty (undefined) for some of the documents public void testEmptyFieldSort() throws Exception { + + // NOTE: do not test DocValues fields here, since you + // can't sort when some documents don't have the field sort.setSort (new SortField ("string", SortField.Type.STRING) ); assertMatches (full, queryF, sort, "ZJI"); @@ -644,14 +762,6 @@ sort.setSort (new SortField ("float", SortField.Type.FLOAT) ); assertMatches (full, queryF, sort, "ZJI"); - if (supportsDocValues) { - sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)) ); - assertMatches (full, queryF, sort, "IZJ"); - - sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)) ); - assertMatches (full, queryF, sort, "ZJI"); - } - // using a nonexisting field as first sort key shouldn't make a difference: sort.setSort (new SortField ("nosuchfield", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT) ); @@ -661,7 +771,6 @@ assertMatches (full, queryF, sort, "IJZ"); // When a field is null for both documents, the next SortField should be used. - // Works for sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("string", SortField.Type.STRING), new SortField ("float", SortField.Type.FLOAT) ); @@ -670,7 +779,7 @@ // Reverse the last criterium to make sure the test didn't pass by chance sort.setSort (new SortField ("int", SortField.Type.INT), new SortField ("string", SortField.Type.STRING), - new SortField ("float", SortField.Type.FLOAT, true) ); + new SortField ("float", SortField.Type.FLOAT, true) ); assertMatches (full, queryG, sort, "ZYXW"); // Do the same for a ParallelMultiSearcher @@ -678,13 +787,13 @@ IndexSearcher parallelSearcher=new IndexSearcher (full.getIndexReader(), exec); sort.setSort (new SortField ("int", SortField.Type.INT), - new SortField ("string", SortField.Type.STRING), - new SortField ("float", SortField.Type.FLOAT) ); + new SortField ("string", SortField.Type.STRING), + new SortField ("float", SortField.Type.FLOAT) ); assertMatches (parallelSearcher, queryG, sort, "ZWXY"); sort.setSort (new SortField ("int", SortField.Type.INT), - new SortField ("string", SortField.Type.STRING), - new SortField ("float", SortField.Type.FLOAT, true) ); + new SortField ("string", SortField.Type.STRING), + new SortField ("float", SortField.Type.FLOAT, true) ); assertMatches (parallelSearcher, queryG, sort, "ZYXW"); parallelSearcher.close(); exec.shutdown(); @@ -701,6 +810,20 @@ sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) ); assertMatches (full, queryX, sort, "GICEA"); + + if (supportsDocValues) { + sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), + useDocValues(new SortField ("float", SortField.Type.FLOAT))); + assertMatches (full, queryX, sort, "IGEAC"); + + sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)), + useDocValues(new SortField (null, SortField.Type.DOC, true))); + assertMatches (full, queryX, sort, "CEAGI"); + + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), + useDocValues(new SortField ("string", getDVStringSortType()))); + assertMatches (full, queryX, sort, "GICEA"); + } } // test a variety of sorts using a parallel multisearcher @@ -1045,6 +1168,21 @@ sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true))); assertMatches(multi, queryF, sort, "JZI"); + + sort.setSort(useDocValues(new SortField("string", getDVStringSortType()))); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort(useDocValues(new SortField("string", getDVStringSortType(), true))); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); + + sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT)),useDocValues(new SortField("string", getDVStringSortType()))); + assertMatches(multi, queryA, sort, "GDHJICEFAB"); + + sort.setSort(useDocValues(new SortField ("string", getDVStringSortType()))); + assertMatches(multi, queryF, sort, "ZJI"); + + sort.setSort(useDocValues(new SortField ("string", getDVStringSortType(), true))); + assertMatches(multi, queryF, sort, "IJZ"); } // up to this point, all of the searches should have "sane" Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 1186760) +++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -17,9 +17,14 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Comparator; + import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.values.IndexDocValues.SortedSource; +import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.IndexDocValues.Source; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.cache.*; @@ -28,8 +33,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.packed.PackedInts; -import java.io.IOException; - /** * Expert: a FieldComparator compares hits so as to determine their * sort order when collecting the top results with {@link @@ -378,6 +381,8 @@ final IndexDocValues docValues = context.reader.docValues(field); if (docValues != null) { currentReaderValues = docValues.getSource(); + } else { + currentReaderValues = IndexDocValues.getDefaultSource(ValueType.FLOAT_64); } return this; } @@ -652,6 +657,8 @@ IndexDocValues docValues = context.reader.docValues(field); if (docValues != null) { currentReaderValues = docValues.getSource(); + } else { + currentReaderValues = IndexDocValues.getDefaultSource(ValueType.FIXED_INTS_64); } return this; } @@ -891,7 +898,7 @@ /** @lucene.internal */ final BytesRef tempBR = new BytesRef(); - public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) { + public TermOrdValComparator(int numHits, String field) { ords = new int[numHits]; values = new BytesRef[numHits]; readerGen = new int[numHits]; @@ -1282,6 +1289,369 @@ } } + /** Sorts by field's natural Term sort order, using + * ordinals; this is just like {@link + * TermOrdValuesComparator} except it uses DocValues to + * retrieve the sort ords saved during indexing. */ + public static final class TermOrdValDocValuesComparator extends FieldComparator { + /** @lucene.internal */ + final int[] ords; + /** @lucene.internal */ + final BytesRef[] values; + /** @lucene.internal */ + final int[] readerGen; + + /** @lucene.internal */ + int currentReaderGen = -1; + private SortedSource termsIndex; + private Comparator comp; + private final String field; + + /** @lucene.internal */ + int bottomSlot = -1; + /** @lucene.internal */ + int bottomOrd; + /** @lucene.internal */ + boolean bottomSameReader; + /** @lucene.internal */ + BytesRef bottomValue; + /** @lucene.internal */ + final BytesRef tempBR = new BytesRef(); + + public TermOrdValDocValuesComparator(int numHits, String field) { + ords = new int[numHits]; + values = new BytesRef[numHits]; + readerGen = new int[numHits]; + this.field = field; + } + + @Override + public int compare(int slot1, int slot2) { + if (readerGen[slot1] == readerGen[slot2]) { + return ords[slot1] - ords[slot2]; + } + + final BytesRef val1 = values[slot1]; + final BytesRef val2 = values[slot2]; + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return comp.compare(val1, val2); + } + + @Override + public int compareBottom(int doc) { + throw new UnsupportedOperationException(); + } + + @Override + public void copy(int slot, int doc) { + throw new UnsupportedOperationException(); + } + + // TODO: would be nice to share these specialized impls + // w/ TermOrdValComparator + + /** Base class for specialized (per bit width of the + * ords) per-segment comparator. NOTE: this is messy; + * we do this only because hotspot can't reliably inline + * the underlying array access when looking up doc->ord + * @lucene.internal + */ + abstract class PerSegmentComparator extends FieldComparator { + + @Override + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + return TermOrdValDocValuesComparator.this.setNextReader(context); + } + + @Override + public int compare(int slot1, int slot2) { + return TermOrdValDocValuesComparator.this.compare(slot1, slot2); + } + + @Override + public void setBottom(final int bottom) { + TermOrdValDocValuesComparator.this.setBottom(bottom); + } + + @Override + public BytesRef value(int slot) { + return TermOrdValDocValuesComparator.this.value(slot); + } + + @Override + public int compareValues(BytesRef val1, BytesRef val2) { + assert val1 != null; + assert val2 != null; + return comp.compare(val1, val2); + } + } + + // Used per-segment when bit width of doc->ord is 8: + private final class ByteOrdComparator extends PerSegmentComparator { + private final byte[] readerOrds; + private final SortedSource termsIndex; + private final int docBase; + + public ByteOrdComparator(byte[] readerOrds, SortedSource termsIndex, int docBase) { + this.readerOrds = readerOrds; + this.termsIndex = termsIndex; + this.docBase = docBase; + } + + @Override + public int compareBottom(int doc) { + assert bottomSlot != -1; + if (bottomSameReader) { + // ord is precisely comparable, even in the equal case + return bottomOrd - (readerOrds[doc]&0xFF); + } else { + // ord is only approx comparable: if they are not + // equal, we can use that; if they are equal, we + // must fallback to compare by value + final int order = readerOrds[doc]&0xFF; + final int cmp = bottomOrd - order; + if (cmp != 0) { + return cmp; + } + + termsIndex.getByOrd(order, tempBR); + return comp.compare(bottomValue, tempBR); + } + } + + @Override + public void copy(int slot, int doc) { + final int ord = readerOrds[doc]&0xFF; + ords[slot] = ord; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.getByOrd(ord, values[slot]); + readerGen[slot] = currentReaderGen; + } + } + + // Used per-segment when bit width of doc->ord is 16: + private final class ShortOrdComparator extends PerSegmentComparator { + private final short[] readerOrds; + private final SortedSource termsIndex; + private final int docBase; + + public ShortOrdComparator(short[] readerOrds, SortedSource termsIndex, int docBase) { + this.readerOrds = readerOrds; + this.termsIndex = termsIndex; + this.docBase = docBase; + } + + @Override + public int compareBottom(int doc) { + assert bottomSlot != -1; + if (bottomSameReader) { + // ord is precisely comparable, even in the equal case + return bottomOrd - (readerOrds[doc]&0xFFFF); + } else { + // ord is only approx comparable: if they are not + // equal, we can use that; if they are equal, we + // must fallback to compare by value + final int order = readerOrds[doc]&0xFFFF; + final int cmp = bottomOrd - order; + if (cmp != 0) { + return cmp; + } + + termsIndex.getByOrd(order, tempBR); + return comp.compare(bottomValue, tempBR); + } + } + + @Override + public void copy(int slot, int doc) { + final int ord = readerOrds[doc]&0xFFFF; + ords[slot] = ord; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.getByOrd(ord, values[slot]); + readerGen[slot] = currentReaderGen; + } + } + + // Used per-segment when bit width of doc->ord is 32: + private final class IntOrdComparator extends PerSegmentComparator { + private final int[] readerOrds; + private final SortedSource termsIndex; + private final int docBase; + + public IntOrdComparator(int[] readerOrds, SortedSource termsIndex, int docBase) { + this.readerOrds = readerOrds; + this.termsIndex = termsIndex; + this.docBase = docBase; + } + + @Override + public int compareBottom(int doc) { + assert bottomSlot != -1; + if (bottomSameReader) { + // ord is precisely comparable, even in the equal case + return bottomOrd - readerOrds[doc]; + } else { + // ord is only approx comparable: if they are not + // equal, we can use that; if they are equal, we + // must fallback to compare by value + final int order = readerOrds[doc]; + final int cmp = bottomOrd - order; + if (cmp != 0) { + return cmp; + } + termsIndex.getByOrd(order, tempBR); + return comp.compare(bottomValue, tempBR); + } + } + + @Override + public void copy(int slot, int doc) { + final int ord = readerOrds[doc]; + ords[slot] = ord; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.getByOrd(ord, values[slot]); + readerGen[slot] = currentReaderGen; + } + } + + // Used per-segment when bit width is not a native array + // size (8, 16, 32): + private final class AnyOrdComparator extends PerSegmentComparator { + private final PackedInts.Reader readerOrds; + private final int docBase; + + public AnyOrdComparator(PackedInts.Reader readerOrds, int docBase) { + this.readerOrds = readerOrds; + this.docBase = docBase; + } + + @Override + public int compareBottom(int doc) { + assert bottomSlot != -1; + if (bottomSameReader) { + // ord is precisely comparable, even in the equal case + return bottomOrd - (int) readerOrds.get(doc); + } else { + // ord is only approx comparable: if they are not + // equal, we can use that; if they are equal, we + // must fallback to compare by value + final int order = (int) readerOrds.get(doc); + final int cmp = bottomOrd - order; + if (cmp != 0) { + return cmp; + } + termsIndex.getByOrd(order, tempBR); + return comp.compare(bottomValue, tempBR); + } + } + + @Override + public void copy(int slot, int doc) { + final int ord = (int) readerOrds.get(doc); + ords[slot] = ord; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.getByOrd(ord, values[slot]); + readerGen[slot] = currentReaderGen; + } + } + + @Override + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + final int docBase = context.docBase; + + final IndexDocValues dv = context.reader.docValues(field); + if (dv == null) { + termsIndex = IndexDocValues.getDefaultSortedSource(ValueType.BYTES_VAR_SORTED, context.reader.maxDoc()); + } else { + termsIndex = dv.getSource().asSortedSource(); + if (termsIndex == null) { + termsIndex = IndexDocValues.getDefaultSortedSource(ValueType.BYTES_VAR_SORTED, context.reader.maxDoc()); + } + } + + comp = termsIndex.getComparator(); + + FieldComparator perSegComp = null; + final PackedInts.Reader docToOrd = termsIndex.getDocToOrd(); + if (docToOrd.hasArray()) { + final Object arr = docToOrd.getArray(); + assert arr != null; + if (arr instanceof byte[]) { + // 8 bit packed + perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase); + } else if (arr instanceof short[]) { + // 16 bit packed + perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase); + } else if (arr instanceof int[]) { + // 32 bit packed + perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase); + } + } + + if (perSegComp == null) { + perSegComp = new AnyOrdComparator(docToOrd, docBase); + } + + currentReaderGen++; + if (bottomSlot != -1) { + perSegComp.setBottom(bottomSlot); + } + + return perSegComp; + } + + @Override + public void setBottom(final int bottom) { + bottomSlot = bottom; + + bottomValue = values[bottomSlot]; + if (currentReaderGen == readerGen[bottomSlot]) { + bottomOrd = ords[bottomSlot]; + bottomSameReader = true; + } else { + if (bottomValue == null) { + // 0 ord is null for all segments + assert ords[bottomSlot] == 0; + bottomOrd = 0; + bottomSameReader = true; + readerGen[bottomSlot] = currentReaderGen; + } else { + final int index = termsIndex.getByValue(bottomValue, tempBR); + if (index < 0) { + bottomOrd = -index - 2; + bottomSameReader = false; + } else { + bottomOrd = index; + // exact value match + bottomSameReader = true; + readerGen[bottomSlot] = currentReaderGen; + ords[bottomSlot] = bottomOrd; + } + } + } + } + + @Override + public BytesRef value(int slot) { + return values[slot]; + } + } + /** Sorts by field's natural Term sort order. All * comparisons are done using BytesRef.compareTo, which is * slow for medium to large result sets but possibly @@ -1367,6 +1737,76 @@ } } + // nocommit test + + /** Sorts by field's natural Term sort order. All + * comparisons are done using BytesRef.compareTo, which is + * slow for medium to large result sets but possibly + * very fast for very small results sets. The BytesRef + * values are obtained using {@ink IndexReader#docValues}. */ + public static final class TermValDocValuesComparator extends FieldComparator { + + private BytesRef[] values; + private Source docTerms; + private final String field; + private BytesRef bottom; + private final BytesRef tempBR = new BytesRef(); + + TermValDocValuesComparator(int numHits, String field) { + values = new BytesRef[numHits]; + this.field = field; + } + + @Override + public int compare(int slot1, int slot2) { + assert values[slot1] != null; + assert values[slot2] != null; + return values[slot1].compareTo(values[slot2]); + } + + @Override + public int compareBottom(int doc) { + assert bottom != null; + return bottom.compareTo(docTerms.getBytes(doc, tempBR)); + } + + @Override + public void copy(int slot, int doc) { + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + docTerms.getBytes(doc, values[slot]); + } + + @Override + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + final IndexDocValues dv = context.reader.docValues(field); + if (dv != null) { + docTerms = dv.getSource(); + } else { + docTerms = IndexDocValues.getDefaultSource(ValueType.BYTES_VAR_DEREF); + } + return this; + } + + @Override + public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override + public BytesRef value(int slot) { + return values[slot]; + } + + @Override + public int compareValues(BytesRef val1, BytesRef val2) { + assert val1 != null; + assert val2 != null; + return val1.compareTo(val2); + } + } + final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) { return binarySearch(br, a, key, 1, a.numOrd()-1); } Index: lucene/src/java/org/apache/lucene/search/SortField.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SortField.java (revision 1186760) +++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy) @@ -311,6 +311,7 @@ @Override public String toString() { StringBuilder buffer = new StringBuilder(); + String dv = useIndexValues ? " [dv]" : ""; switch (type) { case SCORE: buffer.append(""); @@ -321,11 +322,11 @@ break; case STRING: - buffer.append(""); + buffer.append(""); break; case STRING_VAL: - buffer.append(""); + buffer.append(""); break; case BYTE: @@ -337,7 +338,7 @@ break; case INT: - buffer.append(""); + buffer.append(""); break; case LONG: @@ -345,11 +346,11 @@ break; case FLOAT: - buffer.append(""); + buffer.append(""); break; case DOUBLE: - buffer.append(""); + buffer.append(""); break; case CUSTOM: @@ -475,10 +476,18 @@ return comparatorSource.newComparator(field, numHits, sortPos, reverse); case STRING: - return new FieldComparator.TermOrdValComparator(numHits, field, sortPos, reverse); + if (useIndexValues) { + return new FieldComparator.TermOrdValDocValuesComparator(numHits, field); + } else { + return new FieldComparator.TermOrdValComparator(numHits, field); + } case STRING_VAL: - return new FieldComparator.TermValComparator(numHits, field); + if (useIndexValues) { + return new FieldComparator.TermValDocValuesComparator(numHits, field); + } else { + return new FieldComparator.TermValComparator(numHits, field); + } case REWRITEABLE: throw new IllegalStateException("SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)"); Index: lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (revision 1186760) +++ lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.packed.PackedInts; /** * {@link IndexDocValues} provides a dense per-document typed storage for fast @@ -226,7 +227,7 @@ return null; } } - + /** * A sorted variant of {@link Source} for byte[] values per document. *

@@ -261,6 +262,20 @@ public abstract BytesRef getByOrd(int ord, BytesRef bytesRef); /** + * Returns the PackedInts.Reader impl that maps document to ord. + */ + public PackedInts.Reader getDocToOrd() { + return null; + } + + /** + * Returns the comparator used to order the BytesRefs. + */ + public Comparator getComparator() { + return comparator; + } + + /** * Performs a lookup by value. * * @param value @@ -307,4 +322,98 @@ */ public abstract int getValueCount(); } + + /** Returns a Source that always returns default (missing) + * values for all documents. */ + public static Source getDefaultSource(final ValueType type) { + return new Source(type) { + @Override + public long getInt(int docID) { + return 0; + } + + @Override + public double getFloat(int docID) { + return 0.0; + } + + @Override + public BytesRef getBytes(int docID, BytesRef ref) { + ref.length = 0; + return ref; + } + }; + } + + /** Returns a SortedSource that always returns default (missing) + * values for all documents. */ + public static SortedSource getDefaultSortedSource(final ValueType type, final int size) { + + final PackedInts.Reader docToOrd = new PackedInts.Reader() { + @Override + public long get(int index) { + return 0; + } + + @Override + public int getBitsPerValue() { + return 0; + } + + @Override + public int size() { + return size; + } + + @Override + public boolean hasArray() { + return false; + } + + @Override + public Object getArray() { + return null; + } + }; + + return new SortedSource(type, BytesRef.getUTF8SortedAsUnicodeComparator()) { + + @Override + public BytesRef getBytes(int docID, BytesRef ref) { + ref.length = 0; + return ref; + } + + @Override + public int ord(int docID) { + return 0; + } + + @Override + public BytesRef getByOrd(int ord, BytesRef bytesRef) { + assert ord == 0; + bytesRef.length = 0; + return bytesRef; + } + + @Override + public PackedInts.Reader getDocToOrd() { + return docToOrd; + } + + @Override + public int getByValue(BytesRef value, BytesRef spare) { + if (value.length == 0) { + return 0; + } else { + return -1; + } + } + + @Override + public int getValueCount() { + return 1; + } + }; + } } Index: lucene/src/java/org/apache/lucene/index/values/Bytes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Bytes.java (revision 1186760) +++ lucene/src/java/org/apache/lucene/index/values/Bytes.java (working copy) @@ -32,17 +32,17 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.Counter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.ByteBlockPool.Allocator; -import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; -import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; /** @@ -584,7 +584,11 @@ this.idxIn = idxIn; ordToOffsetIndex = hasOffsets ? PackedInts.getReader(idxIn) : null; docToOrdIndex = PackedInts.getReader(idxIn); + } + @Override + public PackedInts.Reader getDocToOrd() { + return docToOrdIndex; } @Override Index: lucene/src/java/org/apache/lucene/document/FieldType.java =================================================================== --- lucene/src/java/org/apache/lucene/document/FieldType.java (revision 1186760) +++ lucene/src/java/org/apache/lucene/document/FieldType.java (working copy) @@ -145,34 +145,34 @@ if (result.length() > 0) result.append(","); result.append("indexed"); + if (tokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVectors()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeTermVectorOffsets()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storeTermVectorPositions()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (omitNorms()) { + result.append(",omitNorms"); + } + if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + result.append(",indexOptions="); + result.append(indexOptions); + } } - if (tokenized()) { - if (result.length() > 0) - result.append(","); - result.append("tokenized"); - } - if (storeTermVectors()) { - if (result.length() > 0) - result.append(","); - result.append("termVector"); - } - if (storeTermVectorOffsets()) { - if (result.length() > 0) - result.append(","); - result.append("termVectorOffsets"); - } - if (storeTermVectorPositions()) { - if (result.length() > 0) - result.append(","); - result.append("termVectorPosition"); - } - if (omitNorms()) { - result.append(",omitNorms"); - } - if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - result.append(",indexOptions="); - result.append(indexOptions); - } return result.toString(); }