Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1448726) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) @@ -70,8 +70,9 @@ int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct)); - return new RecursivePrefixTreeFilter( - getFieldName(), grid, shape, prefixGridScanLevel, detailLevel); + return new IntersectsPrefixTreeFilter( + shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, + true);//hasIndexedLeaves } } Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java (revision 1448726) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java (revision 1448726) @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.spatial.prefix; - -import com.spatial4j.core.shape.Shape; -import com.spatial4j.core.shape.SpatialRelation; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Filter; -import org.apache.lucene.spatial.prefix.tree.Node; -import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.StringHelper; - -import java.io.IOException; -import java.util.LinkedList; - -/** - * Performs a spatial intersection filter between a query shape and a field - * indexed with {@link SpatialPrefixTree}, a Trie. SPT yields terms (grids) at - * length 1 (aka "Level 1") and at greater lengths corresponding to greater - * precisions. This filter recursively traverses each grid length and uses - * methods on {@link Shape} to efficiently know that all points at a prefix fit - * in the shape or not to either short-circuit unnecessary traversals or to - * efficiently load all enclosed points. If no indexed data lies in a portion - * of the shape then that portion of the query shape is quickly passed over - * without decomposing the shape unnecessarily. - * - * @lucene.internal - */ -public class RecursivePrefixTreeFilter extends Filter { - - /* TODOs for future: - -Can a polygon query shape be optimized / made-simpler at recursive depths (e.g. intersection of shape + cell box) - -RE "scan" threshold: - // IF configured to do so, we could use term.freq() as an estimate on the number of places at this depth. OR, perhaps - // make estimates based on the total known term count at this level? - if (!scan) { - //Make some estimations on how many points there are at this level and how few there would need to be to set - // !scan to false. - long termsThreshold = (long) estimateNumberIndexedTerms(cell.length(),queryShape.getDocFreqExpenseThreshold(cell)); - long thisOrd = termsEnum.ord(); - scan = (termsEnum.seek(thisOrd+termsThreshold+1) == TermsEnum.SeekStatus.END - || !cell.contains(termsEnum.term())); - termsEnum.seek(thisOrd);//return to last position - } - - */ - - private final String fieldName; - private final SpatialPrefixTree grid; - private final Shape queryShape; - private final int prefixGridScanLevel;//at least one less than grid.getMaxLevels() - private final int detailLevel; - - public RecursivePrefixTreeFilter(String fieldName, SpatialPrefixTree grid, Shape queryShape, int prefixGridScanLevel, - int detailLevel) { - this.fieldName = fieldName; - this.grid = grid; - this.queryShape = queryShape; - this.prefixGridScanLevel = Math.max(1,Math.min(prefixGridScanLevel,grid.getMaxLevels()-1)); - this.detailLevel = detailLevel; - assert detailLevel <= grid.getMaxLevels(); - } - - @Override - public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptDocs) throws IOException { - AtomicReader reader = ctx.reader(); - OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - Terms terms = reader.terms(fieldName); - if (terms == null) - return null; - TermsEnum termsEnum = terms.iterator(null); - DocsEnum docsEnum = null;//cached for termsEnum.docs() calls - Node scanCell = null; - - //cells is treated like a stack. LinkedList conveniently has bulk add to beginning. It's in sorted order so that we - // always advance forward through the termsEnum index. - LinkedList cells = new LinkedList( - grid.getWorldNode().getSubCells(queryShape) ); - - //This is a recursive algorithm that starts with one or more "big" cells, and then recursively dives down into the - // first such cell that intersects with the query shape. It's a depth first traversal because we don't move onto - // the next big cell (breadth) until we're completely done considering all smaller cells beneath it. For a given - // cell, if it's *within* the query shape then we can conveniently short-circuit the depth traversal and - // grab all documents assigned to this cell/term. For an intersection of the cell and query shape, we either - // recursively step down another grid level or we decide heuristically (via prefixGridScanLevel) that there aren't - // that many points, and so we scan through all terms within this cell (i.e. the term starts with the cell's term), - // seeing which ones are within the query shape. - while(!cells.isEmpty()) { - final Node cell = cells.removeFirst(); - final BytesRef cellTerm = new BytesRef(cell.getTokenBytes()); - if (!termsEnum.seekExact(cellTerm, true)) - continue; - if (cell.getLevel() == detailLevel || cell.isLeaf()) { - docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); - addDocs(docsEnum,bits); - } else {//any other intersection - assert cell.getLevel() < detailLevel; //assertions help clarify logic - assert !cell.isLeaf(); - //If the next indexed term just adds a leaf marker ('+') to cell, - // then add all of those docs - BytesRef nextCellTerm = termsEnum.next(); - if (nextCellTerm == null) - break; - assert StringHelper.startsWith(nextCellTerm, cellTerm); - scanCell = grid.getNode(nextCellTerm.bytes, nextCellTerm.offset, nextCellTerm.length, scanCell); - if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) { - docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); - addDocs(docsEnum,bits); - //increment pointer to avoid potential redundant addDocs() below - nextCellTerm = termsEnum.next(); - if (nextCellTerm == null) - break; - } - - //Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell. - // Scanning is a performance optimization trade-off. - boolean scan = cell.getLevel() >= prefixGridScanLevel;//simple heuristic - - if (!scan) { - //Divide & conquer - cells.addAll(0, cell.getSubCells(queryShape));//add to beginning - } else { - //Scan through all terms within this cell to see if they are within the queryShape. No seek()s. - for(BytesRef term = termsEnum.term(); term != null && StringHelper.startsWith(term,cellTerm); term = termsEnum.next()) { - scanCell = grid.getNode(term.bytes, term.offset, term.length, scanCell); - int termLevel = scanCell.getLevel(); - if (termLevel > detailLevel) - continue; - if (termLevel == detailLevel || scanCell.isLeaf()) { - Shape cShape; - //if this cell represents a point, use the cell center vs the box - // (points never have isLeaf()) - if (termLevel == grid.getMaxLevels() && !scanCell.isLeaf()) - cShape = scanCell.getCenter(); - else - cShape = scanCell.getShape(); - if(queryShape.relate(cShape) == SpatialRelation.DISJOINT) - continue; - - docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); - addDocs(docsEnum,bits); - } - }//term loop - } - } - }//cell loop - - return bits; - } - - private void addDocs(DocsEnum docsEnum, OpenBitSet bits) throws IOException { - int docid; - while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - bits.fastSet(docid); - } - } - - @Override - public String toString() { - return getClass().getSimpleName()+"{fieldName='" + fieldName + '\'' + ", shape=" + queryShape + '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - RecursivePrefixTreeFilter that = (RecursivePrefixTreeFilter) o; - - if (!fieldName.equals(that.fieldName)) return false; - //note that we don't need to look at grid since for the same field it should be the same - if (prefixGridScanLevel != that.prefixGridScanLevel) return false; - if (detailLevel != that.detailLevel) return false; - if (!queryShape.equals(that.queryShape)) return false; - - return true; - } - - @Override - public int hashCode() { - int result = fieldName.hashCode(); - result = 31 * result + queryShape.hashCode(); - result = 31 * result + detailLevel; - return result; - } -} Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision ) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision ) @@ -0,0 +1,377 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.spatial.prefix.tree.Node; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; + +import java.io.IOException; +import java.util.Iterator; + +/** + * Traverses a {@link SpatialPrefixTree} indexed field, using the template & + * visitor design patterns for subclasses to guide the traversal and collect + * matching documents. + *

+ * Subclasses implement {@link #getDocIdSet(org.apache.lucene.index.AtomicReaderContext, + * org.apache.lucene.util.Bits)} by instantiating a custom {@link + * VisitorTemplate} subclass (i.e. an anonymous inner class) and implement the + * required methods. + * + * @lucene.internal + */ +public abstract class AbstractVisitingPrefixTreeFilter extends AbstractPrefixTreeFilter { + + //Historical note: this code resulted from a refactoring of RecursivePrefixTreeFilter, + // which in turn came out of SOLR-2155 + + protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels() + + public AbstractVisitingPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, + int detailLevel, int prefixGridScanLevel) { + super(queryShape, fieldName, grid, detailLevel); + this.prefixGridScanLevel = Math.max(1, Math.min(prefixGridScanLevel, grid.getMaxLevels() - 1)); + assert detailLevel <= grid.getMaxLevels(); + } + + @Override + public boolean equals(Object o) { + if (!super.equals(o)) return false;//checks getClass == o.getClass & instanceof + + AbstractVisitingPrefixTreeFilter that = (AbstractVisitingPrefixTreeFilter) o; + + if (prefixGridScanLevel != that.prefixGridScanLevel) return false; + + return true; + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + prefixGridScanLevel; + return result; + } + + /** + * An abstract class designed to make it easy to implement predicates or + * other operations on a {@link SpatialPrefixTree} indexed field. An instance + * of this class is not designed to be re-used across AtomicReaderContext + * instances so simply create a new one for each call to, say a {@link + * org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.AtomicReaderContext, org.apache.lucene.util.Bits)}. + * The {@link #getDocIdSet()} method here starts the work. It first checks + * that there are indexed terms; if not it quickly returns null. Then it calls + * {@link #start()} so a subclass can set up a return value, like an + * {@link org.apache.lucene.util.OpenBitSet}. Then it starts the traversal + * process, calling {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Node)} + * which by default finds the top cells that intersect {@code queryShape}. If + * there isn't an indexed cell for a corresponding cell returned for this + * method then it's short-circuited until it finds one, at which point + * {@link #visit(org.apache.lucene.spatial.prefix.tree.Node)} is called. At + * some depths, of the tree, the algorithm switches to a scanning mode that + * finds calls {@link #visitScanned(org.apache.lucene.spatial.prefix.tree.Node, com.spatial4j.core.shape.Shape)} + * for each leaf cell found. + */ + public abstract class VisitorTemplate extends BaseTermsEnumTraverser { + + /* Future potential optimizations: + + * Can a polygon query shape be optimized / made-simpler at recursive depths + (e.g. intersection of shape + cell box) + + * RE "scan" vs divide & conquer performance decision: + We should use termsEnum.docFreq() as an estimate on the number of places at + this depth. It would be nice if termsEnum knew how many terms + start with the current term without having to repeatedly next() & test to find out. + + */ + + protected final boolean hasIndexedLeaves;//if false then we can skip looking for them + + private VNode curVNode;//current pointer, derived from query shape + private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term. + private Node scanCell; + + private BytesRef thisTerm;//the result of termsEnum.term() + + public VisitorTemplate(AtomicReaderContext context, Bits acceptDocs, + boolean hasIndexedLeaves) throws IOException { + super(context, acceptDocs); + this.hasIndexedLeaves = hasIndexedLeaves; + } + + /** A Visitor Node/Cell found via the query shape. Sometimes these are + * reset(cell). It's like a LinkedList node but forms a tree. */ + class VNode { + //Note: The VNode tree adds more code to debug/maintain v.s. a flattened + // LinkedList that we used to have. There is more opportunity here for + // custom behavior (see preSiblings & postSiblings) but that's not + // leveraged yet. Maybe this is slightly more GC friendly. + + final VNode parent;//only null at the root + Iterator children;//null, then sometimes set, then null + Node cell;//not null (except initially before reset()) + + /** call reset(cell) after to set the cell. */ + VNode(VNode parent) { // remember to call reset(cell) after + this.parent = parent; + } + + void reset(Node cell) { + assert cell != null; + this.cell = cell; + assert children == null; + } + + } + + public DocIdSet getDocIdSet() throws IOException { + assert curVNode == null : "Called more than once?"; + if (termsEnum == null) + return null; + //advance + if ((thisTerm = termsEnum.next()) == null) + return null; // all done + + curVNode = new VNode(null); + curVNode.reset(grid.getWorldNode()); + + start(); + + addIntersectingChildren(); + + main: while (thisTerm != null) {//terminates for other reasons too! + + //Advance curVNode pointer + if (curVNode.children != null) { + //-- HAVE CHILDREN: DESCEND + assert curVNode.children.hasNext();//if we put it there then it has something + preSiblings(curVNode); + curVNode = curVNode.children.next(); + } else { + //-- NO CHILDREN: ADVANCE TO NEXT SIBLING + VNode parentVNode = curVNode.parent; + while (true) { + if (parentVNode == null) + break main; // all done + if (parentVNode.children.hasNext()) { + //advance next sibling + curVNode = parentVNode.children.next(); + break; + } else { + //reached end of siblings; pop up + postSiblings(parentVNode); + parentVNode.children = null;//GC + parentVNode = parentVNode.parent; + } + } + } + + //Seek to curVNode's cell (or skip if termsEnum has moved beyond) + curVNodeTerm.bytes = curVNode.cell.getTokenBytes(); + curVNodeTerm.length = curVNodeTerm.bytes.length; + int compare = termsEnum.getComparator().compare(thisTerm, curVNodeTerm); + if (compare > 0) { + // leap frog (termsEnum is beyond where we would otherwise seek) + assert ! context.reader().terms(fieldName).iterator(null).seekExact(curVNodeTerm, false) : "should be absent"; + } else { + if (compare < 0) { + // Seek ! + TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(curVNodeTerm, true); + if (seekStatus == TermsEnum.SeekStatus.END) + break; // all done + thisTerm = termsEnum.term(); + if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) { + continue; // leap frog + } + } + // Visit! + boolean descend = visit(curVNode.cell); + //advance + if ((thisTerm = termsEnum.next()) == null) + break; // all done + if (descend) + addIntersectingChildren(); + + } + + }//main loop + + return finish(); + } + + /** Called initially, and whenever {@link #visit(org.apache.lucene.spatial.prefix.tree.Node)} + * returns true. */ + private void addIntersectingChildren() throws IOException { + assert thisTerm != null; + Node cell = curVNode.cell; + if (cell.getLevel() >= detailLevel) + throw new IllegalStateException("Spatial logic error"); + + //Check for adjacent leaf (happens for indexed non-point shapes) + assert !cell.isLeaf(); + if (hasIndexedLeaves && cell.getLevel() != 0) { + //If the next indexed term just adds a leaf marker ('+') to cell, + // then add all of those docs + assert StringHelper.startsWith(thisTerm, curVNodeTerm); + scanCell = grid.getNode(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell); + if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) { + visitLeaf(scanCell); + //advance + if ((thisTerm = termsEnum.next()) == null) + return; // all done + } + } + + //Decide whether to continue to divide & conquer, or whether it's time to + // scan through terms beneath this cell. + // Scanning is a performance optimization trade-off. + + //TODO use termsEnum.docFreq() as heuristic + boolean scan = cell.getLevel() >= prefixGridScanLevel;//simple heuristic + + if (!scan) { + //Divide & conquer (ultimately termsEnum.seek()) + + Iterator subCellsIter = findSubCellsToVisit(cell); + if (!subCellsIter.hasNext())//not expected + return; + curVNode.children = new VNodeCellIterator(subCellsIter, new VNode(curVNode)); + + } else { + //Scan (loop of termsEnum.next()) + + scan(detailLevel); + } + } + + /** + * Called when doing a divide & conquer to find the next intersecting cells + * of the query shape that are beneath {@code cell}. {@code cell} is + * guaranteed to have an intersection and thus this must return some number + * of nodes. + */ + protected Iterator findSubCellsToVisit(Node cell) { + return cell.getSubCells(queryShape).iterator(); + } + + /** + * Scans ({@code termsEnum.next()}) terms until a term is found that does + * not start with curVNode's cell. If it finds a leaf cell or a cell at + * level {@code scanDetailLevel} then it calls {@link + * #visitScanned(org.apache.lucene.spatial.prefix.tree.Node, + * com.spatial4j.core.shape.Shape)}. + */ + protected void scan(int scanDetailLevel) throws IOException { + for (; + thisTerm != null && StringHelper.startsWith(thisTerm, curVNodeTerm); + thisTerm = termsEnum.next()) { + scanCell = grid.getNode(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell); + + int termLevel = scanCell.getLevel(); + if (termLevel > scanDetailLevel) + continue; + if (termLevel == scanDetailLevel || scanCell.isLeaf()) { + Shape cShape; + //if this cell represents a point, use the cell center vs the box + // (points never have isLeaf()) + if (termLevel == grid.getMaxLevels() && !scanCell.isLeaf()) + cShape = scanCell.getCenter(); + else + cShape = scanCell.getShape(); + + visitScanned(scanCell, cShape); + } + }//term loop + } + + /** Used for {@link VNode#children}. */ + private class VNodeCellIterator implements Iterator { + + final Iterator cellIter; + private final VNode vNode; + + VNodeCellIterator(Iterator cellIter, VNode vNode) { + this.cellIter = cellIter; + this.vNode = vNode; + } + + @Override + public boolean hasNext() { + return cellIter.hasNext(); + } + + @Override + public VNode next() { + assert hasNext(); + vNode.reset(cellIter.next()); + return vNode; + } + + @Override + public void remove() {//it always removes + } + } + + /** Called first to setup things. */ + protected abstract void start() throws IOException; + + /** Called last to return the result. */ + protected abstract DocIdSet finish() throws IOException; + + /** + * Visit an indexed cell returned from + * {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Node)}. + * + * @param cell An intersecting cell. + * @return true to descend to more levels. It is an error to return true + * if cell.level == detailLevel + * @throws IOException + */ + protected abstract boolean visit(Node cell) throws IOException; + + /** + * Called after visit() returns true and an indexed leaf cell is found. An + * indexed leaf cell means associated documents generally won't be found at + * further detail levels. + */ + protected abstract void visitLeaf(Node cell) throws IOException; + + /** + * The cell is either indexed as a leaf or is the last level of detail. It + * might not even intersect the query shape, so be sure to check for that. + * Use {@code cellShape} instead of {@code cell.getCellShape} for the cell's + * shape. + */ + protected abstract void visitScanned(Node cell, Shape cellShape) throws IOException; + + + protected void preSiblings(VNode vNode) throws IOException { + } + + protected void postSiblings(VNode vNode) throws IOException { + } + }//class VisitorTemplate + +} Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeFilter.java (revision ) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeFilter.java (revision ) @@ -0,0 +1,124 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Filter; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.OpenBitSet; + +import java.io.IOException; + +/** + * Base class for Lucene Filters on SpatialPrefixTree fields. + * + * @lucene.internal + */ +public abstract class AbstractPrefixTreeFilter extends Filter { + + protected final Shape queryShape; + protected final String fieldName; + protected final SpatialPrefixTree grid;//not in equals/hashCode since it's implied for a specific field + protected final int detailLevel; + + public AbstractPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel) { + this.queryShape = queryShape; + this.fieldName = fieldName; + this.grid = grid; + this.detailLevel = detailLevel; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!getClass().equals(o.getClass())) return false; + + AbstractPrefixTreeFilter that = (AbstractPrefixTreeFilter) o; + + if (detailLevel != that.detailLevel) return false; + if (!fieldName.equals(that.fieldName)) return false; + if (!queryShape.equals(that.queryShape)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = queryShape.hashCode(); + result = 31 * result + fieldName.hashCode(); + result = 31 * result + detailLevel; + return result; + } + + /** Holds transient state and docid collecting utility methods as part of + * traversing a {@link TermsEnum}. */ + public abstract class BaseTermsEnumTraverser { + + protected final AtomicReaderContext context; + protected Bits acceptDocs; + protected final int maxDoc; + + protected TermsEnum termsEnum;//remember to check for null in getDocIdSet + protected DocsEnum docsEnum; + + public BaseTermsEnumTraverser(AtomicReaderContext context, Bits acceptDocs) throws IOException { + this.context = context; + AtomicReader reader = context.reader(); + this.acceptDocs = acceptDocs; + this.maxDoc = reader.maxDoc(); + Terms terms = reader.terms(fieldName); + if (terms != null) + this.termsEnum = terms.iterator(null); + } + + protected void collectDocs(OpenBitSet bitSet) throws IOException { + //WARN: keep this specialization in sync + assert termsEnum != null; + docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); + int docid; + while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + bitSet.fastSet(docid); + } + } + + /* Eventually uncomment when needed. + + protected void collectDocs(Collector collector) throws IOException { + //WARN: keep this specialization in sync + assert termsEnum != null; + docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); + int docid; + while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + collector.collect(docid); + } + } + + public abstract class Collector { + abstract void collect(int docid) throws IOException; + } + */ + } + +} Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision ) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision ) @@ -0,0 +1,91 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.spatial.prefix.tree.Node; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.OpenBitSet; + +import java.io.IOException; + +/** + * A Filter matching documents that have an {@link SpatialRelation#INTERSECTS} + * (i.e. not DISTINCT) relationship with a provided query shape. + * + * @lucene.internal + */ +public class IntersectsPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter { + + private final boolean hasIndexedLeaves; + + public IntersectsPrefixTreeFilter(Shape queryShape, String fieldName, + SpatialPrefixTree grid, int detailLevel, + int prefixGridScanLevel, boolean hasIndexedLeaves) { + super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel); + this.hasIndexedLeaves = hasIndexedLeaves; + } + + @Override + public boolean equals(Object o) { + return super.equals(o) && hasIndexedLeaves == ((IntersectsPrefixTreeFilter)o).hasIndexedLeaves; + } + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return new VisitorTemplate(context, acceptDocs, hasIndexedLeaves) { + private OpenBitSet results; + + @Override + protected void start() { + results = new OpenBitSet(maxDoc); + } + + @Override + protected DocIdSet finish() { + return results; + } + + @Override + protected boolean visit(Node cell) throws IOException { + if (cell.getShapeRel() == SpatialRelation.WITHIN || cell.getLevel() == detailLevel) { + collectDocs(results); + return false; + } + return true; + } + + @Override + protected void visitLeaf(Node cell) throws IOException { + collectDocs(results); + } + + @Override + protected void visitScanned(Node cell, Shape cellShape) throws IOException { + if (queryShape.relate(cellShape).intersects()) + collectDocs(results); + } + + }.getDocIdSet(); + } + +} Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Node.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Node.java (revision 1448726) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Node.java (revision ) @@ -27,7 +27,8 @@ import java.util.List; /** - * Represents a grid cell. These are not necessarily thread-safe, although new Cell("") (world cell) must be. + * Represents a grid cell. These are not necessarily thread-safe, although new + * Cell("") (world cell) must be. * * @lucene.experimental */ @@ -44,13 +45,19 @@ private String token;//this is the only part of equality - /** When set via getSubCells(filter), it is the relationship between this - * cell and the given shape filter. If set via setLeaf() (to WITHIN), it is - * meant to indicate no further sub-cells are going to be provided because - * maxLevels or a detailLevel is hit. It's always null for points. + /** + * When set via getSubCells(filter), it is the relationship between this cell + * and the given shape filter. */ protected SpatialRelation shapeRel; + /** + * Always false for points. Otherwise, indicate no further sub-cells are going + * to be provided because shapeRel is WITHIN or maxLevels or a detailLevel is + * hit. + */ + protected boolean leaf; + protected Node(String token) { this.token = token; if (token.length() > 0 && token.charAt(token.length() - 1) == (char) LEAF_BYTE) { @@ -96,12 +103,13 @@ * further cells with this prefix for the shape (always true at maxLevels). */ public boolean isLeaf() { - return shapeRel == SpatialRelation.WITHIN; + return leaf; } + /** Note: not supported at level 0. */ public void setLeaf() { assert getLevel() != 0; - shapeRel = SpatialRelation.WITHIN; + leaf = true; } /** @@ -139,12 +147,11 @@ /** * Like {@link #getSubCells()} but with the results filtered by a shape. If - * that shape is a {@link com.spatial4j.core.shape.Point} then it - * must call {@link #getSubCell(com.spatial4j.core.shape.Point)}. - * The returned cells should have their {@link Node#shapeRel} set to their - * relation with {@code shapeFilter} for non-point. As such, - * {@link org.apache.lucene.spatial.prefix.tree.Node#isLeaf()} should be - * accurate. + * that shape is a {@link com.spatial4j.core.shape.Point} then it must call + * {@link #getSubCell(com.spatial4j.core.shape.Point)}. The returned cells + * should have {@link Node#getShapeRel()} set to their relation with {@code + * shapeFilter}. In addition, {@link org.apache.lucene.spatial.prefix.tree.Node#isLeaf()} + * must be true when that relation is WITHIN. *

* Precondition: Never called when getLevel() == maxLevel. * @@ -154,29 +161,35 @@ public Collection getSubCells(Shape shapeFilter) { //Note: Higher-performing subclasses might override to consider the shape filter to generate fewer cells. if (shapeFilter instanceof Point) { - return Collections.singleton(getSubCell((Point) shapeFilter)); + Node subCell = getSubCell((Point) shapeFilter); + subCell.shapeRel = SpatialRelation.CONTAINS; + return Collections.singletonList(subCell); } Collection cells = getSubCells(); if (shapeFilter == null) { return cells; } - List copy = new ArrayList(cells.size());//copy since cells contractually isn't modifiable + + //TODO change API to return a filtering iterator + List copy = new ArrayList(cells.size()); for (Node cell : cells) { SpatialRelation rel = cell.getShape().relate(shapeFilter); if (rel == SpatialRelation.DISJOINT) continue; cell.shapeRel = rel; + if (rel == SpatialRelation.WITHIN) + cell.setLeaf(); copy.add(cell); } - cells = copy; - return cells; + return copy; } /** - * Performant implementations are expected to implement this efficiently by considering the current - * cell's boundary. - * Precondition: Never called when getLevel() == maxLevel. + * Performant implementations are expected to implement this efficiently by + * considering the current cell's boundary. Precondition: Never called when + * getLevel() == maxLevel. + *

* Precondition: this.getShape().relate(p) != DISJOINT. */ public abstract Node getSubCell(Point p); @@ -187,7 +200,7 @@ * Gets the cells at the next grid cell level that cover this cell. * Precondition: Never called when getLevel() == maxLevel. * - * @return A set of cells (no dups), sorted. Not Modifiable. + * @return A set of cells (no dups), sorted, modifiable, not empty, not null. */ protected abstract Collection getSubCells(); Index: lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java (revision ) +++ lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java (revision ) @@ -0,0 +1,101 @@ +package org.apache.lucene.spatial; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.spatial.bbox.BBoxStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.spatial.vector.PointVectorStrategy; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collection; + +public class QueryEqualsHashCodeTest extends LuceneTestCase { + + private final SpatialContext ctx = SpatialContext.GEO; + + @Test + public void testEqualsHashCode() { + + final SpatialPrefixTree grid = new QuadPrefixTree(ctx,10); + final SpatialArgs args1 = makeArgs1(); + final SpatialArgs args2 = makeArgs2(); + + Collection generators = new ArrayList(); + generators.add(new ObjGenerator() { + @Override + public Object gen(SpatialArgs args) { + return new RecursivePrefixTreeStrategy(grid, "recursive_quad").makeQuery(args); + } + }); + generators.add(new ObjGenerator() { + @Override + public Object gen(SpatialArgs args) { + return new TermQueryPrefixTreeStrategy(grid, "termquery_quad").makeQuery(args); + } + }); + generators.add(new ObjGenerator() { + @Override + public Object gen(SpatialArgs args) { + return new PointVectorStrategy(ctx, "pointvector").makeQuery(args); + } + }); + generators.add(new ObjGenerator() { + @Override + public Object gen(SpatialArgs args) { + return new BBoxStrategy(ctx, "bbox").makeQuery(args); + } + }); + + for (ObjGenerator generator : generators) { + testStratQueryEqualsHashcode(args1, args2, generator); + } + } + + private void testStratQueryEqualsHashcode(SpatialArgs args1, SpatialArgs args2, ObjGenerator generator) { + Object first = generator.gen(args1); + Object second = generator.gen(args1);//should be the same + assertEquals(first, second); + assertEquals(first.hashCode(), second.hashCode()); + second = generator.gen(args2);//now should be different + assertNotSame(args1, args2); + } + + private SpatialArgs makeArgs1() { + final Shape shape1 = ctx.makeRectangle(0, 0, 10, 10); + return new SpatialArgs(SpatialOperation.Intersects, shape1); + } + + private SpatialArgs makeArgs2() { + final Shape shape2 = ctx.makeRectangle(0, 0, 20, 20); + return new SpatialArgs(SpatialOperation.Intersects, shape2); + } + + interface ObjGenerator { + Object gen(SpatialArgs args); + } + +}