Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision 1416685) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision ) @@ -156,7 +156,7 @@ } final Collection subCells = node.getSubCells(shape); if (node.getLevel() == detailLevel - 1) { - if (subCells.size() < node.getSubCellsSize()) { + if (subCells.size() < node.getSubCellsSize() || node.getLevel() == 0) { if (inclParents) result.add(node); for (Node subCell : subCells) { @@ -164,7 +164,7 @@ } result.addAll(subCells); } else {//a bottom level (i.e. detail level) optimization where all boxes intersect, so use parent cell. - node.setLeaf(); + node.setLeaf();//the cell may not be strictly within but its close result.add(node); } } else { Index: lucene/CHANGES.txt IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/CHANGES.txt (revision 1416685) +++ lucene/CHANGES.txt (revision ) @@ -64,6 +64,12 @@ even if the commitData is the only thing that changes. (Shai Erera, Michael McCandless) +* LUCENE-4585: Spatial PrefixTree based Strategies (either TermQuery or + RecursivePrefix based) MAY want to re-index if used for point data. If a + re-index is not done, then an indexed point is ~1/2 the smallest grid cell + larger and as such is slightly more likely to match a query shape. + (David Smiley) + New Features * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of @@ -201,6 +207,11 @@ * LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris Male, Mike McCandless) + +* LUCENE-4585: Spatial RecursivePrefixTreeFilter had some bugs that only + occurred when non-point shapes were indexed. In rare circumstances, documents + with shapes near a query shape were erroneously considered a match. Also, it + wasn't possible to index a shape representing the entire globe. (David Smiley) Optimizations Index: lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (revision 1416685) +++ lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (revision ) @@ -17,6 +17,9 @@ * limitations under the License. */ +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; + import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; @@ -25,14 +28,16 @@ import java.util.Map; /** - * A clause that compares a stored geometry to a supplied geometry. + * A clause that compares a stored geometry to a supplied geometry. For more + * explanation of each operation, consider looking at the source implementation + * of {@link #evaluate(com.spatial4j.core.shape.Shape, com.spatial4j.core.shape.Shape)}. * * @see * ESRIs docs on spatial relations * * @lucene.experimental */ -public class SpatialOperation implements Serializable { +public abstract class SpatialOperation implements Serializable { // Private registry private static final Map registry = new HashMap(); private static final List list = new ArrayList(); @@ -40,15 +45,55 @@ // Geometry Operations /** Bounding box of the *indexed* shape. */ - public static final SpatialOperation BBoxIntersects = new SpatialOperation("BBoxIntersects", true, false, false); + public static final SpatialOperation BBoxIntersects = new SpatialOperation("BBoxIntersects", true, false, false) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return indexedShape.getBoundingBox().relate(queryShape).intersects(); + } + }; /** Bounding box of the *indexed* shape. */ - public static final SpatialOperation BBoxWithin = new SpatialOperation("BBoxWithin", true, false, false); - public static final SpatialOperation Contains = new SpatialOperation("Contains", true, true, false); - public static final SpatialOperation Intersects = new SpatialOperation("Intersects", true, false, false); - public static final SpatialOperation IsEqualTo = new SpatialOperation("IsEqualTo", false, false, false); - public static final SpatialOperation IsDisjointTo = new SpatialOperation("IsDisjointTo", false, false, false); - public static final SpatialOperation IsWithin = new SpatialOperation("IsWithin", true, false, true); - public static final SpatialOperation Overlaps = new SpatialOperation("Overlaps", true, false, true); + public static final SpatialOperation BBoxWithin = new SpatialOperation("BBoxWithin", true, false, false) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return indexedShape.getBoundingBox().relate(queryShape) == SpatialRelation.WITHIN; + } + }; + public static final SpatialOperation Contains = new SpatialOperation("Contains", true, true, false) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return indexedShape.hasArea() && indexedShape.relate(queryShape) == SpatialRelation.CONTAINS; + } + }; + public static final SpatialOperation Intersects = new SpatialOperation("Intersects", true, false, false) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return indexedShape.relate(queryShape).intersects(); + } + }; + public static final SpatialOperation IsEqualTo = new SpatialOperation("IsEqualTo", false, false, false) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return indexedShape.equals(queryShape); + } + }; + public static final SpatialOperation IsDisjointTo = new SpatialOperation("IsDisjointTo", false, false, false) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return ! indexedShape.relate(queryShape).intersects(); + } + }; + public static final SpatialOperation IsWithin = new SpatialOperation("IsWithin", true, false, true) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return queryShape.hasArea() && indexedShape.relate(queryShape) == SpatialRelation.WITHIN; + } + }; + public static final SpatialOperation Overlaps = new SpatialOperation("Overlaps", true, false, true) { + @Override + public boolean evaluate(Shape indexedShape, Shape queryShape) { + return queryShape.hasArea() && indexedShape.relate(queryShape).intersects(); + } + }; // Member variables private final boolean scoreIsMeaningful; @@ -90,6 +135,11 @@ return false; } + /** + * Returns whether the relationship between indexedShape and queryShape is + * satisfied by this operation. + */ + public abstract boolean evaluate(Shape indexedShape, Shape queryShape); // ================================================= Getters / Setters ============================================= Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 1416685) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision ) @@ -228,16 +228,16 @@ class QuadCell extends Node { public QuadCell(String token) { - super(QuadPrefixTree.this, token); + super(token); } public QuadCell(String token, SpatialRelation shapeRel) { - super(QuadPrefixTree.this, token); + super(token); this.shapeRel = shapeRel; } QuadCell(byte[] bytes, int off, int len) { - super(QuadPrefixTree.this, bytes, off, len); + super(bytes, off, len); } @Override Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java (revision 1416685) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java (revision ) @@ -19,7 +19,11 @@ import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; -import org.apache.lucene.index.*; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; @@ -110,23 +114,28 @@ while(!cells.isEmpty()) { final Node cell = cells.removeFirst(); final BytesRef cellTerm = new BytesRef(cell.getTokenBytes()); - TermsEnum.SeekStatus seekStat = termsEnum.seekCeil(cellTerm); - if (seekStat == TermsEnum.SeekStatus.END) - break; - if (seekStat == TermsEnum.SeekStatus.NOT_FOUND) + if (!termsEnum.seekExact(cellTerm, true)) continue; if (cell.getLevel() == detailLevel || cell.isLeaf()) { docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0); addDocs(docsEnum,bits); } else {//any other intersection - //If the next indexed term is the leaf marker, then add all of them + assert cell.getLevel() < detailLevel; //assertions help clarify logic + assert !cell.isLeaf(); + //If the next indexed term just adds a leaf marker ('+') to cell, + // then add all of those docs BytesRef nextCellTerm = termsEnum.next(); + if (nextCellTerm == null) + break; assert StringHelper.startsWith(nextCellTerm, cellTerm); scanCell = grid.getNode(nextCellTerm.bytes, nextCellTerm.offset, nextCellTerm.length, scanCell); - if (scanCell.isLeaf()) { + if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) { docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0); addDocs(docsEnum,bits); - termsEnum.next();//move pointer to avoid potential redundant addDocs() below + //increment pointer to avoid potential redundant addDocs() below + nextCellTerm = termsEnum.next(); + if (nextCellTerm == null) + break; } //Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell. @@ -144,8 +153,13 @@ if (termLevel > detailLevel) continue; if (termLevel == detailLevel || scanCell.isLeaf()) { - //TODO should put more thought into implications of box vs point - Shape cShape = termLevel == grid.getMaxLevels() ? scanCell.getCenter() : scanCell.getShape(); + Shape cShape; + //if this cell represents a point, use the cell center vs the box + // (points never have isLeaf()) + if (termLevel == grid.getMaxLevels() && !scanCell.isLeaf()) + cShape = scanCell.getCenter(); + else + cShape = scanCell.getShape(); if(queryShape.relate(cShape) == SpatialRelation.DISJOINT) continue; Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision 1416685) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision ) @@ -101,11 +101,11 @@ class GhCell extends Node { GhCell(String token) { - super(GeohashPrefixTree.this, token); + super(token); } GhCell(byte[] bytes, int off, int len) { - super(GeohashPrefixTree.this, bytes, off, len); + super(bytes, off, len); } @Override Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Node.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Node.java (revision 1416685) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Node.java (revision ) @@ -44,11 +44,14 @@ private String token;//this is the only part of equality - protected SpatialRelation shapeRel;//set in getSubCells(filter), and via setLeaf(). - private SpatialPrefixTree spatialPrefixTree; + /** When set via getSubCells(filter), it is the relationship between this + * cell and the given shape filter. If set via setLeaf() (to WITHIN), it is + * meant to indicate no further sub-cells are going to be provided because + * maxLevels or a detailLevel is hit. It's always null for points. + */ + protected SpatialRelation shapeRel; - protected Node(SpatialPrefixTree spatialPrefixTree, String token) { - this.spatialPrefixTree = spatialPrefixTree; + protected Node(String token) { this.token = token; if (token.length() > 0 && token.charAt(token.length() - 1) == (char) LEAF_BYTE) { this.token = token.substring(0, token.length() - 1); @@ -59,8 +62,7 @@ getShape();//ensure any lazy instantiation completes to make this threadsafe } - protected Node(SpatialPrefixTree spatialPrefixTree, byte[] bytes, int off, int len) { - this.spatialPrefixTree = spatialPrefixTree; + protected Node(byte[] bytes, int off, int len) { this.bytes = bytes; this.b_off = off; this.b_len = len; @@ -78,11 +80,10 @@ } private void b_fixLeaf() { + //note that non-point shapes always have the maxLevels cell set with setLeaf if (bytes[b_off + b_len - 1] == LEAF_BYTE) { b_len--; setLeaf(); - } else if (getLevel() == spatialPrefixTree.getMaxLevels()) { - setLeaf(); } } @@ -90,6 +91,10 @@ return shapeRel; } + /** + * For points, this is always false. Otherwise this is true if there are no + * further cells with this prefix for the shape (always true at maxLevels). + */ public boolean isLeaf() { return shapeRel == SpatialRelation.WITHIN; } @@ -133,8 +138,14 @@ //public Cell getParent(); /** - * Like {@link #getSubCells()} but with the results filtered by a shape. If that shape is a {@link com.spatial4j.core.shape.Point} then it - * must call {@link #getSubCell(com.spatial4j.core.shape.Point)}; + * Like {@link #getSubCells()} but with the results filtered by a shape. If + * that shape is a {@link com.spatial4j.core.shape.Point} then it + * must call {@link #getSubCell(com.spatial4j.core.shape.Point)}. + * The returned cells should have their {@link Node#shapeRel} set to their + * relation with {@code shapeFilter} for non-point. As such, + * {@link org.apache.lucene.spatial.prefix.tree.Node#isLeaf()} should be + * accurate. + *

* Precondition: Never called when getLevel() == maxLevel. * * @param shapeFilter an optional filter for the returned cells.