Index: contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java (revision 764815) +++ contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java (working copy) @@ -18,140 +18,57 @@ */ import java.io.IOException; +import java.util.BitSet; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.search.DocIdSet; - abstract class AbstractTrieRangeFilter extends Filter { - AbstractTrieRangeFilter(final String field, final int precisionStep, - Number min, Number max, final boolean minInclusive, final boolean maxInclusive - ) { - this.field=field.intern(); - this.precisionStep=precisionStep; - this.min=min; - this.max=max; - this.minInclusive=minInclusive; - this.maxInclusive=maxInclusive; + AbstractTrieRangeFilter(AbstractTrieRangeQuery query) { + this.query = query; } //@Override public String toString() { - return toString(null); + // query.toString() should not print the boost, as fixed to 1.0f + return query.toString(); } - public String toString(final String field) { - final StringBuffer sb=new StringBuffer(); - if (!this.field.equals(field)) sb.append(this.field).append(':'); - return sb.append(minInclusive ? '[' : '{') - .append((min==null) ? "*" : min.toString()) - .append(" TO ") - .append((max==null) ? "*" : max.toString()) - .append(maxInclusive ? ']' : '}').toString(); - } - //@Override public final boolean equals(final Object o) { if (o==this) return true; if (o==null) return false; if (this.getClass().equals(o.getClass())) { - AbstractTrieRangeFilter q=(AbstractTrieRangeFilter)o; - return ( - field==q.field && - (q.min == null ? min == null : q.min.equals(min)) && - (q.max == null ? max == null : q.max.equals(max)) && - minInclusive==q.minInclusive && - maxInclusive==q.maxInclusive && - precisionStep==q.precisionStep - ); + return this.query.equals( ((AbstractTrieRangeFilter)o).query ); } return false; } //@Override public final int hashCode() { - int hash = field.hashCode() + (precisionStep^0x64365465); - if (min!=null) hash += min.hashCode()^0x14fa55fb; - if (max!=null) hash += max.hashCode()^0x733fa5fe; - return hash+ - (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ - (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); + return query.hashCode(); } /** - * Expert: Return the number of terms visited during the last execution of {@link #getDocIdSet}. + * Expert: Return the number of terms visited during the last execution of the filter. * This may be used for performance comparisons of different trie variants and their effectiveness. * This method is not thread safe, be sure to only call it when no query is running! - * @throws IllegalStateException if {@link #getDocIdSet} was not yet executed. + * @throws IllegalStateException if {@link #getDocIdSet} or {@link #bits} was not yet executed. */ public int getLastNumberOfTerms() { - if (lastNumberOfTerms < 0) throw new IllegalStateException(); - return lastNumberOfTerms; + return query.getLastNumberOfTerms(); } - - void resetLastNumberOfTerms() { - lastNumberOfTerms=0; - } - /** Returns this range filter as a query. - * Using this method, it is possible to create a Query using new {Long|Int}TrieRangeFilter(....).asQuery(). - * This is a synonym for wrapping with a {@link ConstantScoreQuery}, - * but this query returns a better toString() variant. - */ - public Query asQuery() { - return new ConstantScoreQuery(this) { - - /** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */ - //@Override - public String toString(final String field) { - // return a more convenient representation of this query than ConstantScoreQuery does: - return ((AbstractTrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost()); - } - - }; + public BitSet bits(IndexReader reader) throws IOException { + return query.getFilter().bits(reader); } - void fillBits( - final IndexReader reader, - final OpenBitSet bits, final TermDocs termDocs, - final String lowerTerm, final String upperTerm - ) throws IOException { - final int len=lowerTerm.length(); - assert upperTerm.length()==len; - - // find the docs - final TermEnum enumerator = reader.terms(new Term(field, lowerTerm)); - try { - do { - final Term term = enumerator.term(); - if (term!=null && term.field()==field) { - // break out when upperTerm reached or length of term is different - final String t=term.text(); - if (len!=t.length() || t.compareTo(upperTerm)>0) break; - // we have a good term, find the docs - lastNumberOfTerms++; - termDocs.seek(enumerator); - while (termDocs.next()) bits.set(termDocs.doc()); - } else break; - } while (enumerator.next()); - } finally { - enumerator.close(); - } + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return query.getFilter().getDocIdSet(reader); } - + // members - final String field; - final int precisionStep; - final Number min,max; - final boolean minInclusive,maxInclusive; - - private int lastNumberOfTerms=-1; + final AbstractTrieRangeQuery query; } Index: contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java (revision 0) +++ contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java (revision 0) @@ -0,0 +1,112 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.FilteredTermEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.ToStringUtils; + +abstract class AbstractTrieRangeQuery extends MultiTermQuery { + + AbstractTrieRangeQuery(final String field, final int precisionStep, + Number min, Number max, final boolean minInclusive, final boolean maxInclusive + ) { + this.field = field.intern(); + this.precisionStep = precisionStep; + this.min = min; + this.max = max; + this.minInclusive = minInclusive; + this.maxInclusive = maxInclusive; + setConstantScoreRewrite(true); + } + + abstract void passRanges(TrieRangeTermEnum enumerator); + + //@Override + protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException { + TrieRangeTermEnum enumerator = new TrieRangeTermEnum(this, reader); + passRanges(enumerator); + enumerator.init(); + return enumerator; + } + + /** Returns the field name for this query */ + public String getField() { return field; } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesMin() { return minInclusive; } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesMax() { return maxInclusive; } + + //@Override + public String toString(final String field) { + final StringBuffer sb=new StringBuffer(); + if (!this.field.equals(field)) sb.append(this.field).append(':'); + return sb.append(minInclusive ? '[' : '{') + .append((min==null) ? "*" : min.toString()) + .append(" TO ") + .append((max==null) ? "*" : max.toString()) + .append(maxInclusive ? ']' : '}').toString(); + } + + //@Override + public final boolean equals(final Object o) { + if (o==this) return true; + if (o==null) return false; + if (this.getClass().equals(o.getClass())) { + AbstractTrieRangeQuery q=(AbstractTrieRangeQuery)o; + return ( + field==q.field && + (q.min == null ? min == null : q.min.equals(min)) && + (q.max == null ? max == null : q.max.equals(max)) && + minInclusive==q.minInclusive && + maxInclusive==q.maxInclusive && + precisionStep==q.precisionStep + ); + } + return false; + } + + //@Override + public final int hashCode() { + int hash = field.hashCode() + (precisionStep^0x64365465); + if (min!=null) hash += min.hashCode()^0x14fa55fb; + if (max!=null) hash += max.hashCode()^0x733fa5fe; + return hash+ + (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ + (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); + } + + // TODO: Make this method accessible by AbstractTrieRangeFilter, + // can be removed, when moved to core. + //@Override + protected Filter getFilter() { + return super.getFilter(); + } + + // members + final String field; + final int precisionStep; + final Number min,max; + final boolean minInclusive,maxInclusive; +} Index: contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java (revision 764815) +++ contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java (working copy) @@ -17,16 +17,8 @@ * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.document.Document; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.util.OpenBitSet; - /** * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats. * This filter depends on a specific structure of terms in the index that can only be created @@ -47,55 +39,7 @@ public IntTrieRangeFilter(final String field, final int precisionStep, final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive ) { - super(field,precisionStep,min,max,minInclusive,maxInclusive); + super(new IntTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive)); } - /** - * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results. - */ - //@Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - // calculate the upper and lower bounds respecting the inclusive and null values. - int minBound=(this.min==null) ? Integer.MIN_VALUE : ( - minInclusive ? this.min.intValue() : (this.min.intValue()+1) - ); - int maxBound=(this.max==null) ? Integer.MAX_VALUE : ( - maxInclusive ? this.max.intValue() : (this.max.intValue()-1) - ); - - resetLastNumberOfTerms(); - if (minBound > maxBound) { - // shortcut, no docs will match this - return DocIdSet.EMPTY_DOCIDSET; - } else { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - final TermDocs termDocs = reader.termDocs(); - try { - TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() { - - //@Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { - try { - fillBits( - reader, bits, termDocs, - minPrefixCoded, maxPrefixCoded - ); - } catch (IOException ioe) { - // IntRangeBuilder is not allowed to throw checked exceptions: - // wrap as RuntimeException - throw new RuntimeException(ioe); - } - } - - }, precisionStep, minBound, maxBound); - } catch (RuntimeException e) { - if (e.getCause() instanceof IOException) throw (IOException)e.getCause(); - throw e; - } finally { - termDocs.close(); - } - return bits; - } - } - } Index: contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java (revision 0) +++ contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java (revision 0) @@ -0,0 +1,66 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; + +/** + * Implementation of a Lucene {@link Query} that implements trie-based range querying for ints/floats. + * This query depends on a specific structure of terms in the index that can only be created + * by indexing via {@link IntTrieTokenStream} methods. + *

The query is in constant score mode per default. With precision steps of ≤4, this + * query can be run in conventional boolean rewrite mode without changing the max clause count. + * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. + */ +public class IntTrieRangeQuery extends AbstractTrieRangeQuery { + + /** + * A trie query for matching trie coded values using the given field name and + * the default helper field. + * precisionStep must me equal or a multiple of the precisionStep + * used for indexing the values. + * You can leave the bounds open, by supplying null for min and/or + * max. Inclusive/exclusive bounds can also be supplied. + * To query float values use the converter {@link TrieUtils#floatToSortableInt}. + */ + public IntTrieRangeQuery(final String field, final int precisionStep, + final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive + ) { + super(field,precisionStep,min,max,minInclusive,maxInclusive); + } + + //@Override + void passRanges(TrieRangeTermEnum enumerator) { + // calculate the upper and lower bounds respecting the inclusive and null values. + int minBound=(this.min==null) ? Integer.MIN_VALUE : ( + minInclusive ? this.min.intValue() : (this.min.intValue()+1) + ); + int maxBound=(this.max==null) ? Integer.MAX_VALUE : ( + maxInclusive ? this.max.intValue() : (this.max.intValue()-1) + ); + + TrieUtils.splitIntRange(enumerator.getIntRangeBuilder(), precisionStep, minBound, maxBound); + } + + /** Returns the lower value of this range query */ + public Integer getMin() { return (Integer)min; } + + /** Returns the upper value of this range query */ + public Integer getMax() { return (Integer)max; } + +} Index: contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java (revision 764815) +++ contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java (working copy) @@ -17,16 +17,8 @@ * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.document.Document; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.util.OpenBitSet; - /** * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles. * This filter depends on a specific structure of terms in the index that can only be created @@ -47,55 +39,7 @@ public LongTrieRangeFilter(final String field, final int precisionStep, final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive ) { - super(field,precisionStep,min,max,minInclusive,maxInclusive); + super(new LongTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive)); } - /** - * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results. - */ - //@Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - // calculate the upper and lower bounds respecting the inclusive and null values. - long minBound=(this.min==null) ? Long.MIN_VALUE : ( - minInclusive ? this.min.longValue() : (this.min.longValue()+1L) - ); - long maxBound=(this.max==null) ? Long.MAX_VALUE : ( - maxInclusive ? this.max.longValue() : (this.max.longValue()-1L) - ); - - resetLastNumberOfTerms(); - if (minBound > maxBound) { - // shortcut, no docs will match this - return DocIdSet.EMPTY_DOCIDSET; - } else { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - final TermDocs termDocs = reader.termDocs(); - try { - TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() { - - //@Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { - try { - fillBits( - reader, bits, termDocs, - minPrefixCoded, maxPrefixCoded - ); - } catch (IOException ioe) { - // LongRangeBuilder is not allowed to throw checked exceptions: - // wrap as RuntimeException - throw new RuntimeException(ioe); - } - } - - }, precisionStep, minBound, maxBound); - } catch (RuntimeException e) { - if (e.getCause() instanceof IOException) throw (IOException)e.getCause(); - throw e; - } finally { - termDocs.close(); - } - return bits; - } - } - } Index: contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java (revision 0) +++ contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java (revision 0) @@ -0,0 +1,66 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; + +/** + * Implementation of a Lucene {@link Query} that implements trie-based range querying for longs/doubles. + * This query depends on a specific structure of terms in the index that can only be created + * by indexing via {@link LongTrieTokenStream} methods. + *

The query is in constant score mode per default. With precision steps of ≤4, this + * query can be run in conventional boolean rewrite mode without changing the max clause count. + * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. + */ +public class LongTrieRangeQuery extends AbstractTrieRangeQuery { + + /** + * A trie query for matching trie coded values using the given field name and + * the default helper field. + * precisionStep must me equal or a multiple of the precisionStep + * used for indexing the values. + * You can leave the bounds open, by supplying null for min and/or + * max. Inclusive/exclusive bounds can also be supplied. + * To query double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public LongTrieRangeQuery(final String field, final int precisionStep, + final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive + ) { + super(field,precisionStep,min,max,minInclusive,maxInclusive); + } + + //@Override + void passRanges(TrieRangeTermEnum enumerator) { + // calculate the upper and lower bounds respecting the inclusive and null values. + long minBound=(this.min==null) ? Long.MIN_VALUE : ( + minInclusive ? this.min.longValue() : (this.min.longValue()+1L) + ); + long maxBound=(this.max==null) ? Long.MAX_VALUE : ( + maxInclusive ? this.max.longValue() : (this.max.longValue()-1L) + ); + + TrieUtils.splitLongRange(enumerator.getLongRangeBuilder(), precisionStep, minBound, maxBound); + } + + /** Returns the lower value of this range query */ + public Long getMin() { return (Long)min; } + + /** Returns the upper value of this range query */ + public Long getMax() { return (Long)max; } + +} Index: contrib/queries/src/java/org/apache/lucene/search/trie/package.html =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/package.html (revision 764815) +++ contrib/queries/src/java/org/apache/lucene/search/trie/package.html (working copy) @@ -106,14 +106,14 @@

Searching

The numeric index fields you prepared in this way can be searched by -{@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:

+{@link org.apache.lucene.search.trie.LongTrieRangeQuery} or {@link org.apache.lucene.search.trie.IntTrieRangeQuery}:

   // Java 1.4, because Long.valueOf(long) is not available:
-  Query q = new LongTrieRangeFilter("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true).asQuery();
+  Query q = new LongTrieRangeQuery("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true);
   
   // OR, Java 1.5, using autoboxing:
-  Query q = new LongTrieRangeFilter("exampleLong", precisionStep, 123L, 999999L, true, true).asQuery();
+  Query q = new LongTrieRangeQuery("exampleLong", precisionStep, 123L, 999999L, true, true);
   
   // execute the search, as usual:
   TopDocs docs = searcher.search(q, 10);
@@ -132,7 +132,7 @@
 that the old {@link org.apache.lucene.search.RangeQuery} (with raised 
 {@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
 {@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing
-{@link org.apache.lucene.search.trie.LongTrieRangeFilter}.asQuery() took <100ms to
+{@link org.apache.lucene.search.trie.LongTrieRangeQuery} took <100ms to
 complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
 This query type was developed for a geographic portal, where the performance for
 e.g. bounding boxes or exact date/time stamps is important.

Index: contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java (revision 0) +++ contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java (revision 0) @@ -0,0 +1,140 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.LinkedList; + +import org.apache.lucene.search.FilteredTermEnum; +import org.apache.lucene.search.MultiTermQuery; // for javadocs +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * sub-ranges for trie range queries. + *

+ * WARNING: Term enumerations is not guaranteed to be always ordered by + * {@link Term#compareTo}. + * The ordering depends on how {@link TrieUtils#splitLongRange} and + * {@link TrieUtils#splitIntRange} generates the sub-ranges. For + * the {@link MultiTermQuery} ordering is not relevant. + */ +final class TrieRangeTermEnum extends FilteredTermEnum { + + private final AbstractTrieRangeQuery query; + private final IndexReader reader; + private final LinkedList/**/ rangeBounds = new LinkedList/**/(); + private String currentUpperBound = null; + + TrieRangeTermEnum(AbstractTrieRangeQuery query, IndexReader reader) { + this.query = query; + this.reader = reader; + } + + /** Returns a range builder that must be used to feed in the sub-ranges. */ + TrieUtils.IntRangeBuilder getIntRangeBuilder() { + return new TrieUtils.IntRangeBuilder() { + //@Override + public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + rangeBounds.add(minPrefixCoded); + rangeBounds.add(maxPrefixCoded); + } + }; + } + + /** Returns a range builder that must be used to feed in the sub-ranges. */ + TrieUtils.LongRangeBuilder getLongRangeBuilder() { + return new TrieUtils.LongRangeBuilder() { + //@Override + public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + rangeBounds.add(minPrefixCoded); + rangeBounds.add(maxPrefixCoded); + } + }; + } + + /** After feeding the range builder call this method to initialize the enum. */ + void init() throws IOException { + next(); + } + + //@Override + public float difference() { + return 1.0f; + } + + /** this is a dummy, it is not used by this class. */ + //@Override + protected boolean endEnum() { + assert false; // should never be called + return (currentTerm != null); + } + + /** + * Compares if current upper bound is reached, + * this also updates the term count for statistics. + * In contrast to {@link FilteredTermEnum}, a return value + * of false ends iterating the current enum + * and forwards to the next sub-range. + */ + //@Override + protected boolean termCompare(Term term) { + return (term.field() == query.field && term.text().compareTo(currentUpperBound) <= 0); + } + + /** Increments the enumeration to the next element. True if one exists. */ + //@Override + public boolean next() throws IOException { + // if a current term exists, the actual enum is initialized: + // try change to next term, if no such term exists, fall-through + if (currentTerm != null) { + assert actualEnum!=null; + if (actualEnum.next()) { + currentTerm = actualEnum.term(); + if (termCompare(currentTerm)) return true; + } + } + // if all above fails, we go forward to the next enum, + // if one is available + currentTerm = null; + if (rangeBounds.size() < 2) return false; + // close the current enum and read next bounds + if (actualEnum != null) { + actualEnum.close(); + actualEnum = null; + } + final String lowerBound = (String)rangeBounds.removeFirst(); + this.currentUpperBound = (String)rangeBounds.removeFirst(); + // this call recursively uses next(), if no valid term in + // next enum found. + // if this behavior is changed/modified in the superclass, + // this enum will not work anymore! + setEnum(reader.terms(new Term(query.field, lowerBound))); + return (currentTerm != null); + } + + /** Closes the enumeration to further activity, freeing resources. */ + //@Override + public void close() throws IOException { + rangeBounds.clear(); + currentUpperBound = null; + super.close(); + } + +} Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java (revision 764815) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java (working copy) @@ -1,320 +0,0 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Random; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.RangeQuery; -import org.apache.lucene.util.LuceneTestCase; - -public class TestIntTrieRangeFilter extends LuceneTestCase { - // distance of entries - private static final int distance = 6666; - // shift the starting of the values to the left, to also have negative values: - private static final int startOffset = - 1 << 15; - // number of docs to generate for testing - private static final int noDocs = 10000; - - private static Field newField(String name, int precisionStep) { - IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); - stream.setUseNewAPI(true); - Field f=new Field(name, stream); - f.setOmitTermFreqAndPositions(true); - f.setOmitNorms(true); - return f; - } - - private static final RAMDirectory directory; - private static final IndexSearcher searcher; - static { - try { - directory = new RAMDirectory(); - IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), - true, MaxFieldLength.UNLIMITED); - - Field - field8 = newField("field8", 8), - field4 = newField("field4", 4), - field2 = newField("field2", 2), - ascfield8 = newField("ascfield8", 8), - ascfield4 = newField("ascfield4", 4), - ascfield2 = newField("ascfield2", 2); - - // Add a series of noDocs docs with increasing int values - for (int l=0; lupper) { - int a=lower; lower=upper; upper=a; - } - // test inclusive range - IntTrieRangeFilter tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true); - RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true); - cq.setConstantScoreRewrite(true); - TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); - TopDocs cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test left exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test right exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - } - System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); - } - - public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { - testRandomTrieAndClassicRangeQuery(8); - } - - public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { - testRandomTrieAndClassicRangeQuery(4); - } - - public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { - testRandomTrieAndClassicRangeQuery(2); - } - - private void testRangeSplit(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="ascfield"+precisionStep; - // 50 random tests - for (int i=0; i<50; i++) { - int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); - int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); - if (lower>upper) { - int a=lower; lower=upper; upper=a; - } - // test inclusive range - Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); - TopDocs tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); - // test exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); - // test left exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - // test right exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - } - } - - public void testRangeSplit_8bit() throws Exception { - testRangeSplit(8); - } - - public void testRangeSplit_4bit() throws Exception { - testRangeSplit(4); - } - - public void testRangeSplit_2bit() throws Exception { - testRangeSplit(2); - } - - private void testSorting(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="field"+precisionStep; - // 10 random tests, the index order is ascending, - // so using a reverse sort field should retun descending documents - for (int i=0; i<10; i++) { - int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; - int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; - if (lower>upper) { - int a=lower; lower=upper; upper=a; - } - Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); - TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); - if (topDocs.totalHits==0) continue; - ScoreDoc[] sd = topDocs.scoreDocs; - assertNotNull(sd); - int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); - for (int j=1; jact ); - last=act; - } - } - } - - public void testSorting_8bit() throws Exception { - testSorting(8); - } - - public void testSorting_4bit() throws Exception { - testSorting(4); - } - - public void testSorting_2bit() throws Exception { - testSorting(2); - } - -} Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java (revision 0) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java (revision 0) @@ -0,0 +1,370 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIntTrieRangeQuery extends LuceneTestCase { + // distance of entries + private static final int distance = 6666; + // shift the starting of the values to the left, to also have negative values: + private static final int startOffset = - 1 << 15; + // number of docs to generate for testing + private static final int noDocs = 10000; + + private static Field newField(String name, int precisionStep) { + IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); + stream.setUseNewAPI(true); + Field f=new Field(name, stream); + f.setOmitTermFreqAndPositions(true); + f.setOmitNorms(true); + return f; + } + + private static final RAMDirectory directory; + private static final IndexSearcher searcher; + static { + try { + // set the theoretical maximum term count for 8bit (see docs for the number) + BooleanQuery.setMaxClauseCount(3*255*2 + 255); + + directory = new RAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), + true, MaxFieldLength.UNLIMITED); + + Field + field8 = newField("field8", 8), + field4 = newField("field4", 4), + field2 = newField("field2", 2), + ascfield8 = newField("ascfield8", 8), + ascfield4 = newField("ascfield4", 4), + ascfield2 = newField("ascfield2", 2); + + // Add a series of noDocs docs with increasing int values + for (int l=0; l0) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } + } + + public void testRange_8bit() throws Exception { + testRange(8); + } + + public void testRange_4bit() throws Exception { + testRange(4); + } + + public void testRange_2bit() throws Exception { + testRange(2); + } + + public void testInverseRange() throws Exception { + IntTrieRangeFilter f = new IntTrieRangeFilter("field8", 8, new Integer(1000), new Integer(-1000), true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + + private void testLeftOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int upper=(count-1)*distance + (distance/3) + startOffset; + IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, null, new Integer(upper), true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + } + + public void testLeftOpenRange_8bit() throws Exception { + testLeftOpenRange(8); + } + + public void testLeftOpenRange_4bit() throws Exception { + testLeftOpenRange(4); + } + + public void testLeftOpenRange_2bit() throws Exception { + testLeftOpenRange(2); + } + + private void testRightOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int lower=(count-1)*distance + (distance/3) +startOffset; + IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + } + + public void testRightOpenRange_8bit() throws Exception { + testRightOpenRange(8); + } + + public void testRightOpenRange_4bit() throws Exception { + testRightOpenRange(4); + } + + public void testRightOpenRange_2bit() throws Exception { + testRightOpenRange(2); + } + + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + int termCountT=0,termCountC=0; + for (int i=0; i<50; i++) { + int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + IntTrieRangeQuery tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true); + cq.setConstantScoreRewrite(true); + TopDocs tTopDocs = searcher.search(tq, 1); + TopDocs cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + // test exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + // test left exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + // test right exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + } + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(50*4))); + System.out.println(" Classical query: " + (((double)termCountC)/(50*4))); + } + + public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { + testRandomTrieAndClassicRangeQuery(8); + } + + public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { + testRandomTrieAndClassicRangeQuery(4); + } + + public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { + testRandomTrieAndClassicRangeQuery(2); + } + + private void testRangeSplit(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="ascfield"+precisionStep; + // 50 random tests + for (int i=0; i<50; i++) { + int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); + int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + // test exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); + // test left exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + // test right exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + } + } + + public void testRangeSplit_8bit() throws Exception { + testRangeSplit(8); + } + + public void testRangeSplit_4bit() throws Exception { + testRangeSplit(4); + } + + public void testRangeSplit_2bit() throws Exception { + testRangeSplit(2); + } + + private void testSorting(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + for (int i=0; i<10; i++) { + int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); + for (int j=1; jact ); + last=act; + } + } + } + + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + public void testSorting_2bit() throws Exception { + testSorting(2); + } + +} Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java (revision 764815) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java (working copy) @@ -1,320 +0,0 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Random; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.RangeQuery; -import org.apache.lucene.util.LuceneTestCase; - -public class TestLongTrieRangeFilter extends LuceneTestCase { - // distance of entries - private static final long distance = 66666L; - // shift the starting of the values to the left, to also have negative values: - private static final long startOffset = - 1L << 31; - // number of docs to generate for testing - private static final int noDocs = 10000; - - private static Field newField(String name, int precisionStep) { - LongTrieTokenStream stream = new LongTrieTokenStream(precisionStep); - stream.setUseNewAPI(true); - Field f=new Field(name, stream); - f.setOmitTermFreqAndPositions(true); - f.setOmitNorms(true); - return f; - } - - private static final RAMDirectory directory; - private static final IndexSearcher searcher; - static { - try { - directory = new RAMDirectory(); - IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), - true, MaxFieldLength.UNLIMITED); - - Field - field8 = newField("field8", 8), - field4 = newField("field4", 4), - field2 = newField("field2", 2), - ascfield8 = newField("ascfield8", 8), - ascfield4 = newField("ascfield4", 4), - ascfield2 = newField("ascfield2", 2); - - // Add a series of noDocs docs with increasing long values - for (int l=0; lupper) { - long a=lower; lower=upper; upper=a; - } - // test inclusive range - LongTrieRangeFilter tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); - RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true); - cq.setConstantScoreRewrite(true); - TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); - TopDocs cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false); - cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test left exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true); - cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test right exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false); - cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - } - System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); - } - - public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { - testRandomTrieAndClassicRangeQuery(8); - } - - public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { - testRandomTrieAndClassicRangeQuery(4); - } - - public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { - testRandomTrieAndClassicRangeQuery(2); - } - - private void testRangeSplit(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="ascfield"+precisionStep; - // 50 random tests - for (int i=0; i<50; i++) { - long lower=(long)(rnd.nextDouble()*noDocs - noDocs/2); - long upper=(long)(rnd.nextDouble()*noDocs - noDocs/2); - if (lower>upper) { - long a=lower; lower=upper; upper=a; - } - // test inclusive range - Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); - TopDocs tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); - // test exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); - // test left exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - // test right exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - } - } - - public void testRangeSplit_8bit() throws Exception { - testRangeSplit(8); - } - - public void testRangeSplit_4bit() throws Exception { - testRangeSplit(4); - } - - public void testRangeSplit_2bit() throws Exception { - testRangeSplit(2); - } - - private void testSorting(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="field"+precisionStep; - // 10 random tests, the index order is ascending, - // so using a reverse sort field should retun descending documents - for (int i=0; i<10; i++) { - long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; - long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; - if (lower>upper) { - long a=lower; lower=upper; upper=a; - } - Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); - TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true))); - if (topDocs.totalHits==0) continue; - ScoreDoc[] sd = topDocs.scoreDocs; - assertNotNull(sd); - long last=TrieUtils.prefixCodedToLong(searcher.doc(sd[0].doc).get("value")); - for (int j=1; jact ); - last=act; - } - } - } - - public void testSorting_8bit() throws Exception { - testSorting(8); - } - - public void testSorting_4bit() throws Exception { - testSorting(4); - } - - public void testSorting_2bit() throws Exception { - testSorting(2); - } - -} Index: contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java =================================================================== --- contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java (revision 0) +++ contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java (revision 0) @@ -0,0 +1,370 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class TestLongTrieRangeQuery extends LuceneTestCase { + // distance of entries + private static final long distance = 66666L; + // shift the starting of the values to the left, to also have negative values: + private static final long startOffset = - 1L << 31; + // number of docs to generate for testing + private static final int noDocs = 10000; + + private static Field newField(String name, int precisionStep) { + LongTrieTokenStream stream = new LongTrieTokenStream(precisionStep); + stream.setUseNewAPI(true); + Field f=new Field(name, stream); + f.setOmitTermFreqAndPositions(true); + f.setOmitNorms(true); + return f; + } + + private static final RAMDirectory directory; + private static final IndexSearcher searcher; + static { + try { + // set the theoretical maximum term count for 8bit (see docs for the number) + BooleanQuery.setMaxClauseCount(7*255*2 + 255); + + directory = new RAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), + true, MaxFieldLength.UNLIMITED); + + Field + field8 = newField("field8", 8), + field4 = newField("field4", 4), + field2 = newField("field2", 2), + ascfield8 = newField("ascfield8", 8), + ascfield4 = newField("ascfield4", 4), + ascfield2 = newField("ascfield2", 2); + + // Add a series of noDocs docs with increasing long values + for (int l=0; l0) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } + } + + public void testRange_8bit() throws Exception { + testRange(8); + } + + public void testRange_4bit() throws Exception { + testRange(4); + } + + public void testRange_2bit() throws Exception { + testRange(2); + } + + public void testInverseRange() throws Exception { + LongTrieRangeFilter f = new LongTrieRangeFilter("field8", 8, new Long(1000L), new Long(-1000L), true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + + private void testLeftOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + long upper=(count-1)*distance + (distance/3) + startOffset; + LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, null, new Long(upper), true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + } + + public void testLeftOpenRange_8bit() throws Exception { + testLeftOpenRange(8); + } + + public void testLeftOpenRange_4bit() throws Exception { + testLeftOpenRange(4); + } + + public void testLeftOpenRange_2bit() throws Exception { + testLeftOpenRange(2); + } + + private void testRightOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + long lower=(count-1)*distance + (distance/3) +startOffset; + LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, new Long(lower), null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + } + + public void testRightOpenRange_8bit() throws Exception { + testRightOpenRange(8); + } + + public void testRightOpenRange_4bit() throws Exception { + testRightOpenRange(4); + } + + public void testRightOpenRange_2bit() throws Exception { + testRightOpenRange(2); + } + + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + int termCountT=0,termCountC=0; + for (int i=0; i<50; i++) { + long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; + long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + // test inclusive range + LongTrieRangeQuery tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); + RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true); + cq.setConstantScoreRewrite(true); + TopDocs tTopDocs = searcher.search(tq, 1); + TopDocs cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + // test exclusive range + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false); + cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + // test left exclusive range + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true); + cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + // test right exclusive range + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false); + cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getLastNumberOfTerms(); + termCountC += cq.getLastNumberOfTerms(); + } + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(50*4))); + System.out.println(" Classical query: " + (((double)termCountC)/(50*4))); + } + + public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { + testRandomTrieAndClassicRangeQuery(8); + } + + public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { + testRandomTrieAndClassicRangeQuery(4); + } + + public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { + testRandomTrieAndClassicRangeQuery(2); + } + + private void testRangeSplit(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="ascfield"+precisionStep; + // 50 random tests + for (int i=0; i<50; i++) { + long lower=(long)(rnd.nextDouble()*noDocs - noDocs/2); + long upper=(long)(rnd.nextDouble()*noDocs - noDocs/2); + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + // test inclusive range + Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + // test exclusive range + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); + // test left exclusive range + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + // test right exclusive range + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + } + } + + public void testRangeSplit_8bit() throws Exception { + testRangeSplit(8); + } + + public void testRangeSplit_4bit() throws Exception { + testRangeSplit(4); + } + + public void testRangeSplit_2bit() throws Exception { + testRangeSplit(2); + } + + private void testSorting(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + for (int i=0; i<10; i++) { + long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; + long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); + TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + long last=TrieUtils.prefixCodedToLong(searcher.doc(sd[0].doc).get("value")); + for (int j=1; jact ); + last=act; + } + } + } + + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + public void testSorting_2bit() throws Exception { + testSorting(2); + } + +}