Index: contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java (revision 723704) +++ contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java (working copy) @@ -21,6 +21,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.ExtendedFieldCache; /** *This is a helper class to construct the trie-based index entries for numerical values. @@ -68,6 +70,26 @@ /** Character used as lower end */ public static final char TRIE_CODED_SYMBOL_MIN=(char)0x100; + /** + * A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs, + * auto detecting the trie encoding variant using the String length. + */ + public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER_AUTO=new ExtendedFieldCache.LongParser(){ + public final long parseLong(String val) { + return trieCodedToLongAuto(val); + } + }; + + /** + * A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles, + * auto detecting the trie encoding variant using the String length. + */ + public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER_AUTO=new ExtendedFieldCache.DoubleParser(){ + public final double parseDouble(String val) { + return trieCodedToDoubleAuto(val); + } + }; + private static TrieUtils defaultTrieVariant=TrieUtils.VARIANT_8BIT; /** @@ -130,6 +152,22 @@ return autoDetectVariant(s).trieCodedToDate(s); } + /** + * A factory method, that generates a {@link SortField} instance for sorting trie encoded values, + * automatically detecting the trie encoding variant using the String length. + */ + public static final SortField getSortFieldAuto(final String field) { + return new SortField(field, SortField.LONG, FIELD_CACHE_LONG_PARSER_AUTO); + } + + /** + * A factory method, that generates a {@link SortField} instance for sorting trie encoded values, + * automatically detecting the trie encoding variant using the String length. + */ + public static final SortField getSortFieldAuto(final String field, boolean reverse) { + return new SortField(field, SortField.LONG, FIELD_CACHE_LONG_PARSER_AUTO, reverse); + } + // TrieUtils instance's part private TrieUtils(int bits) { @@ -338,6 +376,30 @@ addConvertedTrieCodedDocumentField(ldoc, fieldname, longToTrieCoded(val), index, store); } + /** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */ + public SortField getSortField(final String field) { + return new SortField(field, SortField.LONG, FIELD_CACHE_LONG_PARSER); + } + + /** A factory method, that generates a {@link SortField} instance for sorting trie encoded values. */ + public SortField getSortField(final String field, boolean reverse) { + return new SortField(field, SortField.LONG, FIELD_CACHE_LONG_PARSER, reverse); + } + + /** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as longs. */ + public final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){ + public final long parseLong(String val) { + return trieCodedToLong(val); + } + }; + + /** A parser instance for filling a {@link ExtendedFieldCache}, that parses trie encoded fields as doubles. */ + public final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){ + public final double parseDouble(String val) { + return trieCodedToDouble(val); + } + }; + private final long mask; /** Number of bits used in this trie variant (2, 4, or 8) */ Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java (revision 723704) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieRangeQuery.java (working copy) @@ -163,7 +163,7 @@ private void testRangeSplit(final TrieUtils variant) throws Exception { String field="ascfield"+variant.TRIE_BITS; - // 50 random tests, the tests may also return 0 results, if min>max, but this is ok + // 50 random tests for (int i=0; i<50; i++) { long lower=(long)(rnd.nextDouble()*10000L); long upper=(long)(rnd.nextDouble()*10000L); @@ -188,4 +188,40 @@ testRangeSplit(TrieUtils.VARIANT_2BIT); } + private void testSorting(final TrieUtils variant) throws Exception { + String field="field"+variant.TRIE_BITS; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + for (int i=0; i<10; i++) { + long lower=(long)(rnd.nextDouble()*10000L*distance); + long upper=(long)(rnd.nextDouble()*10000L*distance); + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + TrieRangeQuery tq=new TrieRangeQuery(field, new Long(lower), new Long(upper), variant); + TopDocs topDocs = searcher.search(tq, null, 10000, new Sort(variant.getSortField(field, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + long last=variant.trieCodedToLong(searcher.doc(sd[0].doc).get(field)); + for (int j=1; jact ); + last=act; + } + } + } + + public void testSorting_8bit() throws Exception { + testSorting(TrieUtils.VARIANT_8BIT); + } + + public void testSorting_4bit() throws Exception { + testSorting(TrieUtils.VARIANT_4BIT); + } + + public void testSorting_2bit() throws Exception { + testSorting(TrieUtils.VARIANT_2BIT); + } + } Index: src/java/org/apache/lucene/search/ExtendedFieldCache.java =================================================================== --- src/java/org/apache/lucene/search/ExtendedFieldCache.java (revision 723704) +++ src/java/org/apache/lucene/search/ExtendedFieldCache.java (working copy) @@ -27,14 +27,14 @@ * **/ public interface ExtendedFieldCache extends FieldCache { - public interface LongParser { + public interface LongParser extends Parser { /** * Return an long representation of this field's value. */ public long parseLong(String string); } - public interface DoubleParser { + public interface DoubleParser extends Parser { /** * Return an long representation of this field's value. */ Index: src/java/org/apache/lucene/search/FieldCache.java =================================================================== --- src/java/org/apache/lucene/search/FieldCache.java (revision 723704) +++ src/java/org/apache/lucene/search/FieldCache.java (working copy) @@ -74,10 +74,18 @@ } } + /** + * Marker interface as super-interface to all parsers. It is used + * to provide the possiblilty, e.g. in {@link SortField}, to supply any + * field parser. + */ + public interface Parser { + } + /** Interface to parse bytes from document fields. * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser) */ - public interface ByteParser { + public interface ByteParser extends Parser { /** Return a single Byte representation of this field's value. */ public byte parseByte(String string); } @@ -85,7 +93,7 @@ /** Interface to parse shorts from document fields. * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser) */ - public interface ShortParser { + public interface ShortParser extends Parser { /** Return a short representation of this field's value. */ public short parseShort(String string); } @@ -93,7 +101,7 @@ /** Interface to parse ints from document fields. * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser) */ - public interface IntParser { + public interface IntParser extends Parser { /** Return an integer representation of this field's value. */ public int parseInt(String string); } @@ -101,7 +109,7 @@ /** Interface to parse floats from document fields. * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser) */ - public interface FloatParser { + public interface FloatParser extends Parser { /** Return an float representation of this field's value. */ public float parseFloat(String string); } Index: src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 723704) +++ src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -88,7 +88,7 @@ static class Entry { final String field; // which Fieldable final int type; // which SortField type - final Object custom; // which custom comparator + final Object custom; // which custom comparator or parser final Locale locale; // the locale we're sorting (if string) /** Creates one of these objects. */ @@ -99,7 +99,7 @@ this.locale = locale; } - /** Creates one of these objects for a custom comparator. */ + /** Creates one of these objects for a custom comparator/parser. */ Entry (String field, Object custom) { this.field = field.intern(); this.type = SortField.CUSTOM; @@ -107,6 +107,14 @@ this.locale = null; } + /** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue. */ + Entry (String field, int type, Parser parser) { + this.field = field.intern(); + this.type = type; + this.custom = parser; + this.locale = null; + } + /** Two of these are equal iff they reference the same field and type. */ public boolean equals (Object o) { if (o instanceof Entry) { Index: src/java/org/apache/lucene/search/FieldSortedHitQueue.java =================================================================== --- src/java/org/apache/lucene/search/FieldSortedHitQueue.java (revision 723704) +++ src/java/org/apache/lucene/search/FieldSortedHitQueue.java (working copy) @@ -52,12 +52,12 @@ this.fields = new SortField[n]; for (int i=0; inull if + * type is SCORE or DOC. + * @param type Type of values in the terms. + * @param parser Instance of a {@link FieldCache} parser that fits to the given type. + * @throws IllegalArgumentException if the parser does not fit to the type + */ + public SortField (String field, int type, FieldCache.Parser parser) { + this(field, type, parser, false); + } + + /** Creates a sort, possibly in reverse, by terms in the given field with the + * type of term values explicitly given. + * @param field Name of field to sort by. Can be null if + * type is SCORE or DOC. + * @param type Type of values in the terms. + * @param parser Instance of a {@link FieldCache} parser that fits to the given type. + * @param reverse True if natural order should be reversed. + * @throws IllegalArgumentException if the parser does not fit to the type + */ + public SortField (String field, int type, FieldCache.Parser parser, boolean reverse) { + if (parser!=null) { + if (!( + (type == INT && parser instanceof FieldCache.IntParser) || + (type == FLOAT && parser instanceof FieldCache.FloatParser) || + (type == LONG && parser instanceof ExtendedFieldCache.LongParser) || + (type == DOUBLE && parser instanceof ExtendedFieldCache.DoubleParser) || + (type == SHORT && parser instanceof FieldCache.ShortParser) || + (type == BYTE && parser instanceof FieldCache.ByteParser) + )) throw new IllegalArgumentException("The type does not correspond to given parser instance."); + } + this.field = (field != null) ? field.intern() : field; + this.type = type; + this.reverse = reverse; + this.parser = parser; + } + /** Creates a sort by terms in the given field sorted * according to the given locale. * @param field Name of field to sort by, cannot be null. @@ -210,6 +249,14 @@ return locale; } + /** Returns the instance of a {@link FieldCache} parser that fits to the given sort type. + * May return null if no parser was specified. Sorting is using the default parser then. + * @return An instance of a {@link FieldCache} parser, or null. + */ + public FieldCache.Parser getParser() { + return parser; + } + /** Returns whether the sort should be reversed. * @return True if natural order should be reversed. */ @@ -240,6 +287,7 @@ } if (locale != null) buffer.append('(').append(locale).append(')'); + if (parser != null) buffer.append('(').append(parser).append(')'); if (reverse) buffer.append('!'); return buffer.toString(); Index: src/test/org/apache/lucene/search/TestSort.java =================================================================== --- src/test/org/apache/lucene/search/TestSort.java (revision 723704) +++ src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -98,21 +98,21 @@ // the string field to sort by string // the i18n field includes accented characters for testing locale-specific sorting private String[][] data = new String[][] { - // tracer contents int float string custom i18n long double, 'short', byte - { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126"},//A, x - { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1"},//B, y - { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999", "40.00002343", "125", "15"},//C, x - { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", String.valueOf(Long.MAX_VALUE), String.valueOf(Double.MIN_VALUE), String.valueOf(Short.MIN_VALUE), String.valueOf(Byte.MIN_VALUE)},//D, y - { "E", "x a b c d", "5", "2f", "h", "B-8", "peach", String.valueOf(Long.MIN_VALUE), String.valueOf(Double.MAX_VALUE), String.valueOf(Short.MAX_VALUE), String.valueOf(Byte.MAX_VALUE)},//E,x - { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0"},//F,y - { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100"},//G,x - { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50"},//H,y - { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51"},//I,x - { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2"},//J,y - { "W", "g", "1", null, null, null, null, null, null, null, null}, - { "X", "g", "1", "0.1", null, null, null, null, null, null, null}, - { "Y", "g", "1", "0.2", null, null, null, null, null, null, null}, - { "Z", "f g", null, null, null, null, null, null, null, null, null} + // tracer contents int float string custom i18n long double, 'short', byte, 'custom parser encoding' + { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126", "J"},//A, x + { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I"},//B, y + { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999", "40.00002343", "125", "15", "H"},//C, x + { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", String.valueOf(Long.MAX_VALUE), String.valueOf(Double.MIN_VALUE), String.valueOf(Short.MIN_VALUE), String.valueOf(Byte.MIN_VALUE), "G"},//D, y + { "E", "x a b c d", "5", "2f", "h", "B-8", "peach", String.valueOf(Long.MIN_VALUE), String.valueOf(Double.MAX_VALUE), String.valueOf(Short.MAX_VALUE), String.valueOf(Byte.MAX_VALUE), "F"},//E,x + { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0", "E"},//F,y + { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D"},//G,x + { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50", "C"},//H,y + { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51", "B"},//I,x + { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A"},//J,y + { "W", "g", "1", null, null, null, null, null, null, null, null, null}, + { "X", "g", "1", "0.1", null, null, null, null, null, null, null, null}, + { "Y", "g", "1", "0.2", null, null, null, null, null, null, null, null}, + { "Z", "f g", null, null, null, null, null, null, null, null, null, null} }; // create an index of all the documents, or just the x, or just the y documents @@ -132,8 +132,9 @@ if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][7] != null) doc.add (new Field ("long", data[i][7], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][8] != null) doc.add (new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED)); - if (data[i][8] != null) doc.add (new Field ("short", data[i][9], Field.Store.NO, Field.Index.NOT_ANALYZED)); - if (data[i][8] != null) doc.add (new Field ("byte", data[i][10], Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (data[i][9] != null) doc.add (new Field ("short", data[i][9], Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (data[i][10] != null) doc.add (new Field ("byte", data[i][10], Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (data[i][11] != null) doc.add (new Field ("parser", data[i][11], Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.setBoost(2); // produce some scores above 1.0 writer.addDocument (doc); } @@ -218,6 +219,53 @@ assertMatches (full, queryY, sort, "DJHFB"); } + // test sorts where the type of field is specified and a custom field parser is used, that + // uses a simple char encoding. The sorted string contains a character beginning from 'A' that + // is mapped to a numeric value using some "funny" algorithm to be different for each data type. + public void testCustomFieldParserSort() throws Exception { + sort.setSort (new SortField[] { new SortField ("parser", SortField.INT, new FieldCache.IntParser(){ + public final int parseInt(final String val) { + return (int) (val.charAt(0)-'A') * 123456; + } + }), SortField.FIELD_DOC }); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + + sort.setSort (new SortField[] { new SortField ("parser", SortField.FLOAT, new FieldCache.FloatParser(){ + public final float parseFloat(final String val) { + return (float) Math.sqrt( (double) val.charAt(0) ); + } + }), SortField.FIELD_DOC }); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + + sort.setSort (new SortField[] { new SortField ("parser", SortField.LONG, new ExtendedFieldCache.LongParser(){ + public final long parseLong(final String val) { + return (long) (val.charAt(0)-'A') * 1234567890L; + } + }), SortField.FIELD_DOC }); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + + sort.setSort (new SortField[] { new SortField ("parser", SortField.DOUBLE, new ExtendedFieldCache.DoubleParser(){ + public final double parseDouble(final String val) { + return Math.pow( (double) val.charAt(0), (double) (val.charAt(0)-'A') ); + } + }), SortField.FIELD_DOC }); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + + sort.setSort (new SortField[] { new SortField ("parser", SortField.BYTE, new FieldCache.ByteParser(){ + public final byte parseByte(final String val) { + return (byte) (val.charAt(0)-'A'); + } + }), SortField.FIELD_DOC }); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + + sort.setSort (new SortField[] { new SortField ("parser", SortField.SHORT, new FieldCache.ShortParser(){ + public final short parseShort(final String val) { + return (short) (val.charAt(0)-'A'); + } + }), SortField.FIELD_DOC }); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + } + // test sorts when there's nothing in the index public void testEmptyIndex() throws Exception { Searcher empty = getEmptyIndex();