Index: contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java (revision 761725) +++ contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.util.Arrays; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; @@ -33,10 +32,10 @@ abstract class AbstractTrieRangeFilter extends Filter { - AbstractTrieRangeFilter(final String[] fields, final int precisionStep, + AbstractTrieRangeFilter(final String field, final int precisionStep, Number min, Number max, final boolean minInclusive, final boolean maxInclusive ) { - this.fields=(String[])fields.clone(); + this.field=field.intern(); this.precisionStep=precisionStep; this.min=min; this.max=max; @@ -51,7 +50,7 @@ public String toString(final String field) { final StringBuffer sb=new StringBuffer(); - if (!this.fields[0].equals(field)) sb.append(this.fields[0]).append(':'); + if (!this.field.equals(field)) sb.append(this.field).append(':'); return sb.append(minInclusive ? '[' : '{') .append((min==null) ? "*" : min.toString()) .append(" TO ") @@ -66,7 +65,7 @@ if (this.getClass().equals(o.getClass())) { AbstractTrieRangeFilter q=(AbstractTrieRangeFilter)o; return ( - Arrays.equals(fields,q.fields) && + field==q.field && (q.min == null ? min == null : q.min.equals(min)) && (q.max == null ? max == null : q.max.equals(max)) && minInclusive==q.minInclusive && @@ -79,7 +78,7 @@ //@Override public final int hashCode() { - int hash=Arrays.asList(fields).hashCode()+(precisionStep^0x64365465); + int hash = field.hashCode() + (precisionStep^0x64365465); if (min!=null) hash += min.hashCode()^0x14fa55fb; if (max!=null) hash += max.hashCode()^0x733fa5fe; return hash+ @@ -123,12 +122,10 @@ void fillBits( final IndexReader reader, final OpenBitSet bits, final TermDocs termDocs, - String field, final String lowerTerm, final String upperTerm ) throws IOException { final int len=lowerTerm.length(); assert upperTerm.length()==len; - field=field.intern(); // find the docs final TermEnum enumerator = reader.terms(new Term(field, lowerTerm)); @@ -151,7 +148,7 @@ } // members - final String[] fields; + final String field; final int precisionStep; final Number min,max; final boolean minInclusive,maxInclusive; Index: contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java (revision 761725) +++ contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java (working copy) @@ -30,7 +30,7 @@ /** * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats. * This filter depends on a specific structure of terms in the index that can only be created - * by {@link TrieUtils} methods. + * by indexing via {@link IntTrieTokenStream} methods. * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. */ public class IntTrieRangeFilter extends AbstractTrieRangeFilter { @@ -43,52 +43,14 @@ * You can leave the bounds open, by supplying null for min and/or * max. Inclusive/exclusive bounds can also be supplied. * To query float values use the converter {@link TrieUtils#floatToSortableInt}. - *

This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String[])}. - *

This is the recommended usage of TrieUtils/IntTrieRangeFilter. */ public IntTrieRangeFilter(final String field, final int precisionStep, final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive ) { - this( - new String[]{field, field+TrieUtils.LOWER_PRECISION_FIELD_NAME_SUFFIX}, - precisionStep,min,max,minInclusive,maxInclusive - ); + super(field,precisionStep,min,max,minInclusive,maxInclusive); } - - /** - * Expert: A trie filter for matching trie coded values using the given field names. - * You can specify the main and helper field name, that was used to idex the values. - * precisionStep must me equal or a multiple of the precisionStep - * used for indexing the values. - * You can leave the bounds open, by supplying null for min and/or - * max. Inclusive/exclusive bounds can also be supplied. - * To query float values use the converter {@link TrieUtils#floatToSortableInt}. - *

This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String,String[])}. - */ - public IntTrieRangeFilter(final String field, final String lowerPrecisionField, final int precisionStep, - final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive - ) { - this(new String[]{field, lowerPrecisionField},precisionStep,min,max,minInclusive,maxInclusive); - } /** - * Expert: A trie filter for matching trie coded values - * using the given field names. If the array of field names is shorter than the - * trieCoded one, all trieCoded values with higher index get the last field name. - * precisionStep must me equal or a multiple of the precisionStep - * used for indexing the values. - * You can leave the bounds open, by supplying null for min and/or - * max. Inclusive/exclusive bounds can also be supplied. - * To query float values use the converter {@link TrieUtils#floatToSortableInt}. - *

This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String[],String[])}. - */ - public IntTrieRangeFilter(final String[] fields, final int precisionStep, - Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive - ) { - super(fields, precisionStep, min, max, minInclusive, maxInclusive); - } - - /** * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results. */ //@Override @@ -112,11 +74,10 @@ TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() { //@Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded, int level) { + public final void addRange(String minPrefixCoded, String maxPrefixCoded) { try { fillBits( reader, bits, termDocs, - fields[Math.min(fields.length-1, level)], minPrefixCoded, maxPrefixCoded ); } catch (IOException ioe) { Index: contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java (revision 0) +++ contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java (revision 0) @@ -0,0 +1,168 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * This class provides a {@link TokenStream} for indexing int values + * that can be queried by {@link IntTrieRangeFilter}. This stream is not intended + * to be used in analyzers, its more for iterating the different precisions during + * indexing a specific numeric value. + *

A int value is indexed as multiple string encoded terms, each reduced + * by zeroing bits from the right. Each value is also prefixed (in the first char) by the + * shift value (number of bits removed) used during encoding. + *

The number of bits removed from the right for each trie entry is called + * precisionStep in this API. For comparing the different step values, see the + * {@linkplain org.apache.lucene.search.trie package description}. + *

The usage pattern is (it is recommened to switch off norms and term frequencies + * for numeric fields; it does not make sense to have them): + *

+ *  Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * 
+ *

For optimal performance, re-use the TokenStream and Field instance + * for more than one document: + *

+ *  // init
+ *  TokenStream stream = new IntTrieTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  // use this code to index many documents:
+ *  stream.setValue(value1)
+ *  document.add(field);
+ *  writer.addDocument(document);
+ *  stream.setValue(value2)
+ *  document.add(field);
+ *  writer.addDocument(document);
+ *  ...
+ * 
+ *

Please note: Token streams are read, when the document is added to index. + * If you index more than one numeric field, use a separate instance for each. + *

For more information, how trie fields work, see the + * {@linkplain org.apache.lucene.search.trie package description}. + */ +public class IntTrieTokenStream extends TokenStream { + + /** The full precision field gets this token type assigned. */ + public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt"; + + /** The lower precision fields gets this token type assigned. */ + public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt"; + + /** + * Creates a token stream for indexing value with the given + * precisionStep. As instance creating is a major cost, + * consider using a {@link #IntTrieTokenStream(int)} instance once for + * indexing a large number of documents and assign a value with + * {@link #setValue} for each document. + * To index float values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public IntTrieTokenStream(final int value, final int precisionStep) { + if (precisionStep<1 || precisionStep>32) + throw new IllegalArgumentException("precisionStep may only be 1..32"); + this.value = value; + this.precisionStep = precisionStep; + termAtt = (TermAttribute) addAttribute(TermAttribute.class); + typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); + posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); + } + + /** + * Creates a token stream for indexing values with the given + * precisionStep. This stream is initially "empty" + * (using a numeric value of 0), assign a value before indexing + * each document using {@link #setValue}. + */ + public IntTrieTokenStream(final int precisionStep) { + this(0, precisionStep); + } + + /** + * Resets the token stream to deliver prefix encoded values + * for value. Use this method to index the same + * numeric field for a large number of documents and reuse the + * current stream instance. + * To index float values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public void setValue(final int value) { + this.value = value; + reset(); + } + + // @Override + public void reset() { + shift = 0; + } + + // @Override + public boolean incrementToken() { + if (shift>=32) return false; + final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); + termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); + if (shift==0) { + typeAtt.setType(TOKEN_TYPE_FULL_PREC); + posIncrAtt.setPositionIncrement(1); + } else { + typeAtt.setType(TOKEN_TYPE_LOWER_PREC); + posIncrAtt.setPositionIncrement(0); + } + shift += precisionStep; + return true; + } + + // @Override + /** @deprecated */ + public Token next(final Token reusableToken) { + if (shift>=32) return null; + final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); + reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); + if (shift==0) { + reusableToken.setType(TOKEN_TYPE_FULL_PREC); + reusableToken.setPositionIncrement(1); + } else { + reusableToken.setType(TOKEN_TYPE_LOWER_PREC); + reusableToken.setPositionIncrement(0); + } + shift += precisionStep; + return reusableToken; + } + + // @Override + public String toString() { + final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value); + sb.append(",precisionStep=").append(precisionStep).append(')'); + return sb.toString(); + } + + // members + private final TermAttribute termAtt; + private final TypeAttribute typeAtt; + private final PositionIncrementAttribute posIncrAtt; + + private int shift = 0; + private int value; + private final int precisionStep; +} Index: contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java (revision 761725) +++ contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java (working copy) @@ -30,7 +30,7 @@ /** * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles. * This filter depends on a specific structure of terms in the index that can only be created - * by {@link TrieUtils} methods. + * by indexing via {@link LongTrieTokenStream} methods. * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. */ public class LongTrieRangeFilter extends AbstractTrieRangeFilter { @@ -43,52 +43,14 @@ * You can leave the bounds open, by supplying null for min and/or * max. Inclusive/exclusive bounds can also be supplied. * To query double values use the converter {@link TrieUtils#doubleToSortableLong}. - *

This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String[])}. - *

This is the recommended usage of TrieUtils/LongTrieRangeFilter. */ public LongTrieRangeFilter(final String field, final int precisionStep, final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive ) { - this( - new String[]{field, field+TrieUtils.LOWER_PRECISION_FIELD_NAME_SUFFIX}, - precisionStep,min,max,minInclusive,maxInclusive - ); + super(field,precisionStep,min,max,minInclusive,maxInclusive); } - - /** - * Expert: A trie filter for matching trie coded values using the given field names. - * You can specify the main and helper field name, that was used to idex the values. - * precisionStep must me equal or a multiple of the precisionStep - * used for indexing the values. - * You can leave the bounds open, by supplying null for min and/or - * max. Inclusive/exclusive bounds can also be supplied. - * To query double values use the converter {@link TrieUtils#doubleToSortableLong}. - *

This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String,String,String[])}. - */ - public LongTrieRangeFilter(final String field, final String lowerPrecisionField, final int precisionStep, - final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive - ) { - this(new String[]{field, lowerPrecisionField},precisionStep,min,max,minInclusive,maxInclusive); - } /** - * Expert: A trie filter for matching trie coded values - * using the given field names. If the array of field names is shorter than the - * trieCoded one, all trieCoded values with higher index get the last field name. - * precisionStep must me equal or a multiple of the precisionStep - * used for indexing the values. - * You can leave the bounds open, by supplying null for min and/or - * max. Inclusive/exclusive bounds can also be supplied. - * To query double values use the converter {@link TrieUtils#doubleToSortableLong}. - *

This is the counterpart to {@link TrieUtils#addIndexedFields(Document,String[],String[])}. - */ - public LongTrieRangeFilter(final String[] fields, final int precisionStep, - Long min, Long max, final boolean minInclusive, final boolean maxInclusive - ) { - super(fields, precisionStep, min, max, minInclusive, maxInclusive); - } - - /** * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results. */ //@Override @@ -112,11 +74,10 @@ TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() { //@Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded, int level) { + public final void addRange(String minPrefixCoded, String maxPrefixCoded) { try { fillBits( reader, bits, termDocs, - fields[Math.min(fields.length-1, level)], minPrefixCoded, maxPrefixCoded ); } catch (IOException ioe) { Index: contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java (revision 0) +++ contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java (revision 0) @@ -0,0 +1,168 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * This class provides a {@link TokenStream} for indexing long values + * that can be queried by {@link LongTrieRangeFilter}. This stream is not intended + * to be used in analyzers, its more for iterating the different precisions during + * indexing a specific numeric value. + *

A long value is indexed as multiple string encoded terms, each reduced + * by zeroing bits from the right. Each value is also prefixed (in the first char) by the + * shift value (number of bits removed) used during encoding. + *

The number of bits removed from the right for each trie entry is called + * precisionStep in this API. For comparing the different step values, see the + * {@linkplain org.apache.lucene.search.trie package description}. + *

The usage pattern is (it is recommened to switch off norms and term frequencies + * for numeric fields; it does not make sense to have them): + *

+ *  Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * 
+ *

For optimal performance, re-use the TokenStream and Field instance + * for more than one document: + *

+ *  // init
+ *  TokenStream stream = new LongTrieTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  // use this code to index many documents:
+ *  stream.setValue(value1)
+ *  document.add(field);
+ *  writer.addDocument(document);
+ *  stream.setValue(value2)
+ *  document.add(field);
+ *  writer.addDocument(document);
+ *  ...
+ * 
+ *

Please note: Token streams are read, when the document is added to index. + * If you index more than one numeric field, use a separate instance for each. + *

For more information, how trie fields work, see the + * {@linkplain org.apache.lucene.search.trie package description}. + */ +public class LongTrieTokenStream extends TokenStream { + + /** The full precision field gets this token type assigned. */ + public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong"; + + /** The lower precision fields gets this token type assigned. */ + public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong"; + + /** + * Creates a token stream for indexing value with the given + * precisionStep. As instance creating is a major cost, + * consider using a {@link #LongTrieTokenStream(int)} instance once for + * indexing a large number of documents and assign a value with + * {@link #setValue} for each document. + * To index double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public LongTrieTokenStream(final long value, final int precisionStep) { + if (precisionStep<1 || precisionStep>64) + throw new IllegalArgumentException("precisionStep may only be 1..64"); + this.value = value; + this.precisionStep = precisionStep; + termAtt = (TermAttribute) addAttribute(TermAttribute.class); + typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); + posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); + } + + /** + * Creates a token stream for indexing values with the given + * precisionStep. This stream is initially "empty" + * (using a numeric value of 0), assign a value before indexing + * each document using {@link #setValue}. + */ + public LongTrieTokenStream(final int precisionStep) { + this(0L, precisionStep); + } + + /** + * Resets the token stream to deliver prefix encoded values + * for value. Use this method to index the same + * numeric field for a large number of documents and reuse the + * current stream instance. + * To index double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public void setValue(final long value) { + this.value = value; + reset(); + } + + // @Override + public void reset() { + shift = 0; + } + + // @Override + public boolean incrementToken() { + if (shift>=64) return false; + final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); + termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); + if (shift==0) { + typeAtt.setType(TOKEN_TYPE_FULL_PREC); + posIncrAtt.setPositionIncrement(1); + } else { + typeAtt.setType(TOKEN_TYPE_LOWER_PREC); + posIncrAtt.setPositionIncrement(0); + } + shift += precisionStep; + return true; + } + + // @Override + /** @deprecated */ + public Token next(final Token reusableToken) { + if (shift>=64) return null; + final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); + reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); + if (shift==0) { + reusableToken.setType(TOKEN_TYPE_FULL_PREC); + reusableToken.setPositionIncrement(1); + } else { + reusableToken.setType(TOKEN_TYPE_LOWER_PREC); + reusableToken.setPositionIncrement(0); + } + shift += precisionStep; + return reusableToken; + } + + // @Override + public String toString() { + final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value); + sb.append(",precisionStep=").append(precisionStep).append(')'); + return sb.toString(); + } + + // members + private final TermAttribute termAtt; + private final TypeAttribute typeAtt; + private final PositionIncrementAttribute posIncrAtt; + + private int shift = 0; + private long value; + private final int precisionStep; +} Index: contrib/queries/src/java/org/apache/lucene/search/trie/package.html =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/package.html (revision 761725) +++ contrib/queries/src/java/org/apache/lucene/search/trie/package.html (working copy) @@ -50,10 +50,14 @@ are no longer dependent on the index size and the number of distinct values because there is an upper limit unrelated to either of these properties.

-

Usage

+

Indexing Usage

To use the new query types the numerical values, which may belong, double, int, float, or Date, the values must be indexed in a special prefix encoded format -(using {@link org.apache.lucene.search.trie.TrieUtils}). This can be done like this:

+using {@link org.apache.lucene.search.trie.LongTrieTokenStream} or +{@link org.apache.lucene.search.trie.IntTrieTokenStream}, which generate the necessary tokens. +Use {@link org.apache.lucene.search.trie.TrieUtils} to convert floating point values to integers. +Example code for indexing (it is recommened to disable norms and term frequencies during indexing +trie encoded fields):

   // chose a step value, 8 is a general good value for large indexes:
@@ -67,15 +71,25 @@
   
   // add some numerical fields:
   long lvalue = 121345L;
-  TrieUtils.addIndexedFields(doc, "exampleLong", TrieUtils.trieCodeLong(lvalue, precisionStep));
+  Field f = new Field("exampleLong", new LongTrieTokenStream(lvalue, precisionStep));
+  f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
+  doc.add(f);
   double dvalue = 1.057E17;
-  TrieUtils.addIndexedFields(doc, "exampleDouble", TrieUtils.trieCodeLong(TrieUtils.doubleToSortableLong(dvalue), precisionStep));
+  f = new Field("exampleDouble", new LongTrieTokenStream(TrieUtils.doubleToSortableLong(dvalue), precisionStep));
+  f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
+  doc.add(f);
   int ivalue = 121345;
-  TrieUtils.addIndexedFields(doc, "exampleInt", TrieUtils.trieCodeInt(ivalue, precisionStep));
+  f = new Field("exampleInt", new IntTrieTokenStream(ivalue, precisionStep));
+  f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
+  doc.add(f);
   float fvalue = 1.057E17f;
-  TrieUtils.addIndexedFields(doc, "exampleFloat", TrieUtils.trieCodeInt(TrieUtils.floatToSortableInt(fvalue), precisionStep));
+  f = new Field("exampleFloat", new IntTrieTokenStream(TrieUtils.floatToSortableInt(fvalue), precisionStep));
+  f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
+  doc.add(f);
   Date datevalue = new Date(); // actual time
-  TrieUtils.addIndexedFields(doc, "exampleDate", TrieUtils.trieCodeLong(datevalue.getTime(), precisionStep));
+  f = new Field("exampleDate", new LongTrieTokenStream(datevalue.getTime(), precisionStep));
+  f.setOmitNorms(true); f.setOmitTermFreqAndPositions(true);
+  doc.add(f);
   
   // if you want to also store one of the values:
   doc.add(new Field("exampleLong", Long.toString(lvalue), Field.Store.YES, Field.Index.NO));
@@ -86,6 +100,11 @@
   // now add document to IndexWriter, as usual
 
+

(for higher indexing performance, you can reuse the TokenStreams – +more info about this in the stream documentation)

+ +

Searching

+

The numeric index fields you prepared in this way can be searched by {@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:

Index: contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java (revision 761725) +++ contrib/queries/src/java/org/apache/lucene/search/trie/TrieUtils.java (working copy) @@ -17,17 +17,13 @@ * limitations under the License. */ -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.search.SortField; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.ExtendedFieldCache; /** - * This is a helper class to construct the trie-based index entries for numerical values. - * For more information on how the algorithm works, see the - * {@linkplain org.apache.lucene.search.trie package description}. - *

The trie format using prefix encoded numerical values

+ * This is a helper class to generate prefix-encoded representations for numerical values + * and supplies converters to represent float/double values as sortable integers/longs. *

To quickly execute range queries in Apache Lucene, a range is divided recursively * into multiple intervals for searching: The center of the range is searched only with * the lowest possible precision in the trie, while the boundaries are matched @@ -35,54 +31,48 @@ *

This class generates terms to achive this: First the numerical integer values need to * be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned * and the bits are converted to ASCII chars with each 7 bit. The resulting string is - * sortable like the original integer value. + * sortable like the original integer value. Each value is also prefixed + * (in the first char) by the shift value (number of bits removed) used + * during encoding. *

To also index floating point numbers, this class supplies two methods to convert them * to integer values by changing their bit layout: {@link #doubleToSortableLong}, * {@link #floatToSortableInt}. You will have no precision loss by * converting floating point numbers to integers and back (only that the integer form * is not usable). Other data types like dates can easily converted to longs or ints (e.g. * date to long: {@link java.util.Date#getTime}). - *

To index the different precisions of the long values each encoded value is also reduced - * by zeroing bits from the right. Each value is also prefixed (in the first char) by the - * shift value (number of bits removed) used during encoding. This series of - * different precision values can be indexed into a Lucene {@link Document} using - * {@link #addIndexedFields(Document,String,String[])}. The default is to index the original - * precision in the supplied field name and the lower precisions in an additional helper field. - * Because of this, the full-precision field can also be sorted (using {@link #getLongSortField} - * or {@link #getIntSortField}). - *

The number of bits removed from the right for each trie entry is called - * precisionStep in this API. For comparing the different step values, see the - * {@linkplain org.apache.lucene.search.trie package description}. + *

Prefix encoded fields can also be sorted using the {@link SortField} factories + * {@link #getLongSortField} or {@link #getIntSortField}. */ public final class TrieUtils { private TrieUtils() {} // no instance! /** - * The default "helper" field containing the lower precision terms is the original - * fieldname with this appended. This suffix is used in - * {@link #addIndexedFields(Document,String,String[])} and the corresponding c'tor - * of (Long|Int)TrieRangeFilter. - */ - public static final String LOWER_PRECISION_FIELD_NAME_SUFFIX="#trie"; - - /** * Longs are stored at lower precision by shifting off lower bits. The shift count is * stored as SHIFT_START_LONG+shift in the first character */ public static final char SHIFT_START_LONG = (char)0x20; + /** internal: maximum needed char[] buffer size for encoding */ + static final int LONG_BUF_SIZE = 63/7 + 2; + /** * Integers are stored at lower precision by shifting off lower bits. The shift count is * stored as SHIFT_START_INT+shift in the first character */ public static final char SHIFT_START_INT = (char)0x60; + /** internal: maximum needed char[] buffer size for encoding */ + static final int INT_BUF_SIZE = 31/7 + 2; + /** * A parser instance for filling a {@link ExtendedFieldCache}, that parses prefix encoded fields as longs. */ public static final ExtendedFieldCache.LongParser FIELD_CACHE_LONG_PARSER=new ExtendedFieldCache.LongParser(){ public final long parseLong(final String val) { + final int shift = val.charAt(0)-SHIFT_START_LONG; + if (shift>0 && shift<=63) + throw new FieldCache.StopFillCacheException(); return prefixCodedToLong(val); } }; @@ -92,6 +82,9 @@ */ public static final FieldCache.IntParser FIELD_CACHE_INT_PARSER=new FieldCache.IntParser(){ public final int parseInt(final String val) { + final int shift = val.charAt(0)-SHIFT_START_INT; + if (shift>0 && shift<=31) + throw new FieldCache.StopFillCacheException(); return prefixCodedToInt(val); } }; @@ -102,6 +95,9 @@ */ public static final ExtendedFieldCache.DoubleParser FIELD_CACHE_DOUBLE_PARSER=new ExtendedFieldCache.DoubleParser(){ public final double parseDouble(final String val) { + final int shift = val.charAt(0)-SHIFT_START_LONG; + if (shift>0 && shift<=63) + throw new FieldCache.StopFillCacheException(); return sortableLongToDouble(prefixCodedToLong(val)); } }; @@ -112,9 +108,28 @@ */ public static final FieldCache.FloatParser FIELD_CACHE_FLOAT_PARSER=new FieldCache.FloatParser(){ public final float parseFloat(final String val) { + final int shift = val.charAt(0)-SHIFT_START_INT; + if (shift>0 && shift<=31) + throw new FieldCache.StopFillCacheException(); return sortableIntToFloat(prefixCodedToInt(val)); } }; + + /** internal */ + static int longToPrefixCoded(final long val, final int shift, final char[] buffer) { + int nChars = (63-shift)/7 + 1, len = nChars+1; + buffer[0] = (char)(SHIFT_START_LONG + shift); + long sortableBits = val ^ 0x8000000000000000L; + sortableBits >>>= shift; + while (nChars>=1) { + // Store 7 bits per character for good efficiency when UTF-8 encoding. + // The whole number is right-justified so that lucene can prefix-encode + // the terms more efficiently. + buffer[nChars--] = (char)(sortableBits & 0x7f); + sortableBits >>>= 7; + } + return len; + } /** * This is a convenience method, that returns prefix coded bits of a long without @@ -125,27 +140,33 @@ public static String longToPrefixCoded(final long val) { return longToPrefixCoded(val, 0); } - + /** * Expert: Returns prefix coded bits after reducing the precision by shift bits. - * This is method is used by {@link #trieCodeLong}. + * This is method is used by {@link LongRangeBuilder}. */ public static String longToPrefixCoded(final long val, final int shift) { if (shift>63 || shift<0) throw new IllegalArgumentException("Illegal shift value, must be 0..63"); - int nChars = (63-shift)/7 + 1; - final char[] arr = new char[nChars+1]; - arr[0] = (char)(SHIFT_START_LONG + shift); - long sortableBits = val ^ 0x8000000000000000L; + final char[] buffer = new char[LONG_BUF_SIZE]; + final int len = longToPrefixCoded(val, shift, buffer); + return new String(buffer, 0, len); + } + + /** internal */ + static int intToPrefixCoded(final int val, final int shift, final char[] buffer) { + int nChars = (31-shift)/7 + 1, len = nChars+1; + buffer[0] = (char)(SHIFT_START_INT + shift); + int sortableBits = val ^ 0x80000000; sortableBits >>>= shift; while (nChars>=1) { // Store 7 bits per character for good efficiency when UTF-8 encoding. // The whole number is right-justified so that lucene can prefix-encode // the terms more efficiently. - arr[nChars--] = (char)(sortableBits & 0x7f); + buffer[nChars--] = (char)(sortableBits & 0x7f); sortableBits >>>= 7; } - return new String(arr); + return len; } /** @@ -160,39 +181,30 @@ /** * Expert: Returns prefix coded bits after reducing the precision by shift bits. - * This is method is used by {@link #trieCodeInt}. + * This is method is used by {@link IntRangeBuilder}. */ public static String intToPrefixCoded(final int val, final int shift) { if (shift>31 || shift<0) throw new IllegalArgumentException("Illegal shift value, must be 0..31"); - int nChars = (31-shift)/7 + 1; - final char[] arr = new char[nChars+1]; - arr[0] = (char)(SHIFT_START_INT + shift); - int sortableBits = val ^ 0x80000000; - sortableBits >>>= shift; - while (nChars>=1) { - // Store 7 bits per character for good efficiency when UTF-8 encoding. - // The whole number is right-justified so that lucene can prefix-encode - // the terms more efficiently. - arr[nChars--] = (char)(sortableBits & 0x7f); - sortableBits >>>= 7; - } - return new String(arr); + final char[] buffer = new char[INT_BUF_SIZE]; + final int len = intToPrefixCoded(val, shift, buffer); + return new String(buffer, 0, len); } /** * Returns a long from prefixCoded characters. * Rightmost bits will be zero for lower precision codes. * This method can be used to decode e.g. a stored field. + * @throws NumberFormatException if the supplied char sequence is + * not correctly prefix encoded. * @see #longToPrefixCoded(long) */ - public static long prefixCodedToLong(final String prefixCoded) { - final int len = prefixCoded.length(); + public static long prefixCodedToLong(final CharSequence prefixCoded) { final int shift = prefixCoded.charAt(0)-SHIFT_START_LONG; if (shift>63 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)"); + throw new NumberFormatException("Invalid shift value in prefixCoded char sequence (is encoded value really a LONG?)"); long sortableBits = 0L; - for (int i=1; i0x7f) { @@ -210,15 +222,16 @@ * Returns an int from prefixCoded characters. * Rightmost bits will be zero for lower precision codes. * This method can be used to decode e.g. a stored field. + * @throws NumberFormatException if the supplied char sequence is + * not correctly prefix encoded. * @see #intToPrefixCoded(int) */ - public static int prefixCodedToInt(final String prefixCoded) { - final int len = prefixCoded.length(); + public static int prefixCodedToInt(final CharSequence prefixCoded) { final int shift = prefixCoded.charAt(0)-SHIFT_START_INT; if (shift>31 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); + throw new NumberFormatException("Invalid shift value in prefixCoded char sequence (is encoded value really an INT?)"); int sortableBits = 0; - for (int i=1; i0x7f) { @@ -277,116 +290,20 @@ } /** A factory method, that generates a {@link SortField} instance for sorting prefix encoded long values. */ - public static SortField getLongSortField(final String field, boolean reverse) { + public static SortField getLongSortField(final String field, final boolean reverse) { return new SortField(field, FIELD_CACHE_LONG_PARSER, reverse); } /** A factory method, that generates a {@link SortField} instance for sorting prefix encoded int values. */ - public static SortField getIntSortField(final String field, boolean reverse) { + public static SortField getIntSortField(final String field, final boolean reverse) { return new SortField(field, FIELD_CACHE_INT_PARSER, reverse); } /** - * Returns a sequence of trie coded numbers suitable for {@link LongTrieRangeFilter}. - * Each successive string in the list has had it's precision reduced by precisionStep. - * For sorting, index the first full-precision value into a separate field and the - * remaining values into another field. - *

To achieve this, use {@link #addIndexedFields(Document,String,String[])}. - */ - public static String[] trieCodeLong(long val, int precisionStep) { - if (precisionStep<1 || precisionStep>64) - throw new IllegalArgumentException("precisionStep may only be 1..64"); - String[] arr = new String[63/precisionStep+1]; - int idx = 0; - for (int shift=0; shift<64; shift+=precisionStep) { - arr[idx++] = longToPrefixCoded(val, shift); - } - return arr; - } - - /** - * Returns a sequence of trie coded numbers suitable for {@link IntTrieRangeFilter}. - * Each successive string in the list has had it's precision reduced by precisionStep. - * For sorting, index the first full-precision value into a separate field and the - * remaining values into another field. - *

To achieve this, use {@link #addIndexedFields(Document,String,String[])}. - */ - public static String[] trieCodeInt(int val, int precisionStep) { - if (precisionStep<1 || precisionStep>32) - throw new IllegalArgumentException("precisionStep may only be 1..32"); - String[] arr = new String[31/precisionStep+1]; - int idx = 0; - for (int shift=0; shift<32; shift+=precisionStep) { - arr[idx++] = intToPrefixCoded(val, shift); - } - return arr; - } - - /** - * Indexes the full precision value only in the main field (for sorting), and indexes all other - * lower precision values in field+LOWER_PRECISION_FIELD_NAME_SUFFIX. - *

This is the recommended variant to add trie fields to the index. - * By this it is possible to sort the field using a SortField instance - * returned by {@link #getLongSortField} or {@link #getIntSortField}. - *

This method does not store the fields and saves no term frequency or norms - * (which are normally not needed for trie fields). If you want to additionally store - * the value, you can use the normal methods of {@link Document} to achive this, just specify - * Field.Store.YES, Field.Index.NO and the same field name. - *

Examples: - *

-   *  addIndexedFields(doc, "mydouble", trieCodeLong(doubleToSortableLong(1.414d), 4));
-   *  addIndexedFields(doc, "mylong", trieCodeLong(123456L, 4));
-   * 
- **/ - public static void addIndexedFields(Document doc, String field, String[] trieCoded) { - addIndexedFields(doc, new String[]{field, field+LOWER_PRECISION_FIELD_NAME_SUFFIX}, trieCoded); - } - - /** - * Expert: Indexes the full precision value only in the main field (for sorting), and indexes all other - * lower precision values in the lowerPrecision field. - * If you do not specify the same field name for the main and lower precision one, - * it is possible to sort the field using a SortField instance - * returned by {@link #getLongSortField} or {@link #getIntSortField}. - *

This method does not store the fields and saves no term frequency or norms - * (which are normally not needed for trie fields). If you want to additionally store - * the value, you can use the normal methods of {@link Document} to achive this, just specify - * Field.Store.YES, Field.Index.NO and the same main field name. - *

Examples: - *

-   *  addIndexedFields(doc, "mydouble", "mydoubletrie", trieCodeLong(doubleToSortableLong(1.414d), 4));
-   *  addIndexedFields(doc, "mylong", "mylongtrie", trieCodeLong(123456L, 4));
-   * 
- * @see #addIndexedFields(Document,String,String[]) - **/ - public static void addIndexedFields(Document doc, String field, String lowerPrecisionField, String[] trieCoded) { - addIndexedFields(doc, new String[]{field, lowerPrecisionField}, trieCoded); - } - - /** - * Expert: Indexes a series of trie coded values into a lucene {@link Document} - * using the given field names. - * If the array of field names is shorter than the trie coded one, all trie coded - * values with higher index get the last field name. - *

This method does not store the fields and saves no term frequency or norms - * (which are normally not needed for trie fields). If you want to additionally store - * the value, you can use the normal methods of {@link Document} to achive this, just specify - * Field.Store.YES, Field.Index.NO and the same main field name. - **/ - public static void addIndexedFields(Document doc, String[] fields, String[] trieCoded) { - for (int i=0; iThis method is used by {@link LongTrieRangeFilter}. */ @@ -402,7 +319,7 @@ * Expert: Splits an int range recursively. * You may implement a builder that adds clauses to a * {@link org.apache.lucene.search.BooleanQuery} for each call to its - * {@link IntRangeBuilder#addRange(String,String,int)} + * {@link IntRangeBuilder#addRange(String,String)} * method. *

This method is used by {@link IntTrieRangeFilter}. */ @@ -419,7 +336,7 @@ final Object builder, final int valSize, final int precisionStep, long minBound, long maxBound ) { - for (int level=0,shift=0;; level++) { + for (int shift=0; ; shift += precisionStep) { // calculate new bounds for inner precision final long diff = 1L << (shift+precisionStep), mask = ((1L<=valSize || nextMinBound>nextMaxBound) { // We are in the lowest precision or the next precision is not available. - addRange(builder, valSize, minBound, maxBound, shift, level); + addRange(builder, valSize, minBound, maxBound, shift); // exit the split recursion loop break; } if (hasLower) - addRange(builder, valSize, minBound, minBound | mask, shift, level); + addRange(builder, valSize, minBound, minBound | mask, shift); if (hasUpper) - addRange(builder, valSize, maxBound & ~mask, maxBound, shift, level); + addRange(builder, valSize, maxBound & ~mask, maxBound, shift); // recurse to next precision minBound = nextMinBound; maxBound = nextMaxBound; - shift += precisionStep; } } @@ -453,7 +369,7 @@ private static void addRange( final Object builder, final int valSize, long minBound, long maxBound, - final int shift, final int level + final int shift ) { // for the max bound set all lower bits (that were shifted away): // this is important for testing or other usages of the splitted range @@ -463,10 +379,10 @@ // delegate to correct range builder switch(valSize) { case 64: - ((LongRangeBuilder)builder).addRange(minBound, maxBound, shift, level); + ((LongRangeBuilder)builder).addRange(minBound, maxBound, shift); break; case 32: - ((IntRangeBuilder)builder).addRange((int)minBound, (int)maxBound, shift, level); + ((IntRangeBuilder)builder).addRange((int)minBound, (int)maxBound, shift); break; default: // Should not happen! @@ -484,16 +400,9 @@ /** * Overwrite this method, if you like to receive the already prefix encoded range bounds. - * You can directly build classical range queries from them. - * The level gives the precision level (0 = highest precision) of the encoded values. - * This parameter could be used as an index to an array of fieldnames like the - * parameters to {@link #addIndexedFields(Document,String[],String[])} for specifying - * the field names for each precision: - *

-     *  String field = fields[Math.min(fields.length-1, level)];
-     * 
+ * You can directly build classical (inclusive) range queries from them. */ - public void addRange(String minPrefixCoded, String maxPrefixCoded, int level) { + public void addRange(String minPrefixCoded, String maxPrefixCoded) { throw new UnsupportedOperationException(); } @@ -501,10 +410,8 @@ * Overwrite this method, if you like to receive the raw long range bounds. * You can use this for e.g. debugging purposes (print out range bounds). */ - public void addRange(final long min, final long max, final int shift, final int level) { - /*System.out.println(Long.toHexString((min^0x8000000000000000L) >>> shift)+".."+ - Long.toHexString((max^0x8000000000000000L) >>> shift));*/ - addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift), level); + public void addRange(final long min, final long max, final int shift) { + addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift)); } } @@ -519,16 +426,9 @@ /** * Overwrite this method, if you like to receive the already prefix encoded range bounds. - * You can directly build classical range queries from them. - * The level gives the precision level (0 = highest precision) of the encoded values. - * This parameter could be used as an index to an array of fieldnames like the - * parameters to {@link #addIndexedFields(Document,String[],String[])} for specifying - * the field names for each precision: - *
-     *  String field = fields[Math.min(fields.length-1, level)];
-     * 
+ * You can directly build classical range (inclusive) queries from them. */ - public void addRange(String minPrefixCoded, String maxPrefixCoded, int level) { + public void addRange(String minPrefixCoded, String maxPrefixCoded) { throw new UnsupportedOperationException(); } @@ -536,10 +436,8 @@ * Overwrite this method, if you like to receive the raw int range bounds. * You can use this for e.g. debugging purposes (print out range bounds). */ - public void addRange(final int min, final int max, final int shift, final int level) { - /*System.out.println(Integer.toHexString((min^0x80000000) >>> shift)+".."+ - Integer.toHexString((max^0x80000000) >>> shift));*/ - addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift), level); + public void addRange(final int min, final int max, final int shift) { + addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift)); } } Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java (revision 761725) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java (working copy) @@ -42,6 +42,15 @@ // number of docs to generate for testing private static final int noDocs = 10000; + private static Field newField(String name, int precisionStep) { + IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); + stream.setUseNewAPI(true); + Field f=new Field(name, stream); + f.setOmitTermFreqAndPositions(true); + f.setOmitNorms(true); + return f; + } + private static final RAMDirectory directory; private static final IndexSearcher searcher; static { @@ -50,21 +59,34 @@ IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); + Field + field8 = newField("field8", 8), + field4 = newField("field4", 4), + field2 = newField("field2", 2), + ascfield8 = newField("ascfield8", 8), + ascfield4 = newField("ascfield4", 4), + ascfield2 = newField("ascfield2", 2); + // Add a series of noDocs docs with increasing int values for (int l=0; lact ); last=act; } Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java (revision 761725) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java (working copy) @@ -42,6 +42,15 @@ // number of docs to generate for testing private static final int noDocs = 10000; + private static Field newField(String name, int precisionStep) { + LongTrieTokenStream stream = new LongTrieTokenStream(precisionStep); + stream.setUseNewAPI(true); + Field f=new Field(name, stream); + f.setOmitTermFreqAndPositions(true); + f.setOmitNorms(true); + return f; + } + private static final RAMDirectory directory; private static final IndexSearcher searcher; static { @@ -50,21 +59,34 @@ IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); + Field + field8 = newField("field8", 8), + field4 = newField("field4", 4), + field2 = newField("field2", 2), + ascfield8 = newField("ascfield8", 8), + ascfield4 = newField("ascfield4", 4), + ascfield2 = newField("ascfield2", 2); + // Add a series of noDocs docs with increasing long values for (int l=0; lact ); last=act; } Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java (revision 761725) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestTrieUtils.java (working copy) @@ -179,7 +179,7 @@ TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() { //@Override - public void addRange(long min, long max, int shift, int level) { + public void addRange(long min, long max, int shift) { assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper); if (useBitSet) for (long l=min; l<=max; l++) { assertFalse("ranges should not overlap", bits.getAndSet(l-lower) ); @@ -253,7 +253,7 @@ TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() { //@Override - public void addRange(int min, int max, int shift, int level) { + public void addRange(int min, int max, int shift) { assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper); if (useBitSet) for (int i=min; i<=max; i++) { assertFalse("ranges should not overlap", bits.getAndSet(i-lower) ); Index: src/java/org/apache/lucene/search/ExtendedFieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/ExtendedFieldCacheImpl.java (revision 761725) +++ src/java/org/apache/lucene/search/ExtendedFieldCacheImpl.java (working copy) @@ -73,6 +73,7 @@ retArray[termDocs.doc()] = termval; } } while (termEnum.next()); + } catch (StopFillCacheException stop) { } finally { termDocs.close(); termEnum.close(); @@ -113,6 +114,7 @@ retArray[termDocs.doc()] = termval; } } while (termEnum.next()); + } catch (StopFillCacheException stop) { } finally { termDocs.close(); termEnum.close(); Index: src/java/org/apache/lucene/search/FieldCache.java =================================================================== --- src/java/org/apache/lucene/search/FieldCache.java (revision 761725) +++ src/java/org/apache/lucene/search/FieldCache.java (working copy) @@ -82,6 +82,15 @@ public interface Parser { } + /** + * Internal hack for contrib's TrieUtils, to stop parsing, + * when lower precision terms appear during parsing. + *

DO NOT USE IN YOUR OWN CODE! It is just public + * to be accessible from o.a.l.search.trie. + */ + public static class StopFillCacheException extends RuntimeException { + } + /** Interface to parse bytes from document fields. * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser) */ Index: src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 761725) +++ src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -196,6 +196,7 @@ retArray[termDocs.doc()] = termval; } } while (termEnum.next()); + } catch (StopFillCacheException stop) { } finally { termDocs.close(); termEnum.close(); @@ -235,6 +236,7 @@ retArray[termDocs.doc()] = termval; } } while (termEnum.next()); + } catch (StopFillCacheException stop) { } finally { termDocs.close(); termEnum.close(); @@ -274,6 +276,7 @@ retArray[termDocs.doc()] = termval; } } while (termEnum.next()); + } catch (StopFillCacheException stop) { } finally { termDocs.close(); termEnum.close(); @@ -315,6 +318,7 @@ retArray[termDocs.doc()] = termval; } } while (termEnum.next()); + } catch (StopFillCacheException stop) { } finally { termDocs.close(); termEnum.close();