Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision 1443751) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision ) @@ -116,88 +116,86 @@ } /** - * Gets the intersecting & including cells for the specified shape, without exceeding detail level. - * The result is a set of cells (no dups), sorted. Unmodifiable. + * Gets the intersecting cells for the specified shape, without exceeding + * detail level. If a cell is within the query shape then it's marked as a + * leaf and none of its children are added. *

- * This implementation checks if shape is a Point and if so uses an implementation that - * recursively calls {@link Node#getSubCell(com.spatial4j.core.shape.Point)}. Cell subclasses - * ideally implement that method with a quick implementation, otherwise, subclasses should - * override this method to invoke {@link #getNodesAltPoint(com.spatial4j.core.shape.Point, int, boolean)}. - * TODO consider another approach returning an iterator -- won't build up all cells in memory. + * This implementation checks if shape is a Point and if so returns {@link + * #getNodes(com.spatial4j.core.shape.Point, int, boolean)}. + * + * @param shape the shape; non-null + * @param detailLevel the maximum detail level to get cells for + * @param inclParents if true then all parent cells of leaves are returned + * too. The top world cell is never returned. + * @param simplify for non-point shapes, this will simply/aggregate sets of + * complete leaves in a cell to its parent, resulting in + * ~20-25% fewer cells. + * @return a set of cells (no dups), sorted, immutable, non-null */ - public List getNodes(Shape shape, int detailLevel, boolean inclParents) { + public List getNodes(Shape shape, int detailLevel, boolean inclParents, + boolean simplify) { + //TODO consider an on-demand iterator -- it won't build up all cells in memory. if (detailLevel > maxLevels) { throw new IllegalArgumentException("detailLevel > maxLevels"); } - - List cells; if (shape instanceof Point) { - //optimized point algorithm - final int initialCapacity = inclParents ? 1 + detailLevel : 1; - cells = new ArrayList(initialCapacity); - recursiveGetNodes(getWorldNode(), (Point) shape, detailLevel, true, cells); - assert cells.size() == initialCapacity; - } else { - cells = new ArrayList(inclParents ? 1024 : 512); - recursiveGetNodes(getWorldNode(), shape, detailLevel, inclParents, cells); + return getNodes((Point) shape, detailLevel, inclParents); } - if (inclParents) { - Node c = cells.remove(0);//remove getWorldNode() - assert c.getLevel() == 0; - } + List cells = new ArrayList(inclParents ? 4096 : 2048); + recursiveGetNodes(getWorldNode(), shape, detailLevel, inclParents, simplify, cells); return cells; } - private void recursiveGetNodes(Node node, Shape shape, int detailLevel, boolean inclParents, - Collection result) { - if (node.isLeaf()) {//cell is within shape - result.add(node); - return; + /** + * Returns true if node was added as a leaf. If it wasn't it recursively + * descends. + */ + private boolean recursiveGetNodes(Node node, Shape shape, int detailLevel, + boolean inclParents, boolean simplify, + List result) { + if (node.getLevel() == detailLevel) { + node.setLeaf();//FYI might already be a leaf } - final Collection subCells = node.getSubCells(shape); - if (node.getLevel() == detailLevel - 1) { - if (subCells.size() < node.getSubCellsSize() || node.getLevel() == 0) { - if (inclParents) + if (node.isLeaf()) { - result.add(node); + result.add(node); - for (Node subCell : subCells) { - subCell.setLeaf(); + return true; - } + } - result.addAll(subCells); - } else {//a bottom level (i.e. detail level) optimization where all boxes intersect, so use parent cell. - node.setLeaf();//the cell may not be strictly within but its close + if (inclParents && node.getLevel() != 0) - result.add(node); + result.add(node); - } - } else { - if (inclParents) { - result.add(node); - } + + Collection subCells = node.getSubCells(shape); + int leaves = 0; - for (Node subCell : subCells) { + for (Node subCell : subCells) { - recursiveGetNodes(subCell, shape, detailLevel, inclParents, result);//tail call + if (recursiveGetNodes(subCell, shape, detailLevel, inclParents, simplify, result)) + leaves++; - } + } - } - } + //can we simplify? + if (simplify && leaves == node.getSubCellsSize() && node.getLevel() != 0) { + //Optimization: substitute the parent as a leaf instead of adding all + // children as leaves - private void recursiveGetNodes(Node node, Point point, int detailLevel, boolean inclParents, - Collection result) { - if (inclParents) { + //remove the leaves + do { + result.remove(result.size() - 1);//remove last + } while (--leaves > 0); + //add node as the leaf + node.setLeaf(); + if (!inclParents) // otherwise it was already added up above - result.add(node); + result.add(node); + return true; } - final Node pCell = node.getSubCell(point); - if (node.getLevel() == detailLevel - 1) { - pCell.setLeaf(); - result.add(pCell); - } else { - recursiveGetNodes(pCell, point, detailLevel, inclParents, result);//tail call + return false; - } + } - } /** - * Subclasses might override {@link #getNodes(com.spatial4j.core.shape.Shape, int, boolean)} - * and check if the argument is a shape and if so, delegate - * to this implementation, which calls {@link #getNode(com.spatial4j.core.shape.Point, int)} and - * then calls {@link #getNode(String)} repeatedly if inclParents is true. + * A Point-optimized implementation of + * {@link #getNodes(com.spatial4j.core.shape.Shape, int, boolean)}. That + * method in facts calls this for points. + *

+ * This implementation depends on {@link #getNode(String)} being fast, as its + * called repeatedly when incPlarents is true. */ - protected final List getNodesAltPoint(Point p, int detailLevel, boolean inclParents) { + public List getNodes(Point p, int detailLevel, boolean inclParents) { Node cell = getNode(p, detailLevel); if (!inclParents) { return Collections.singletonList(cell); Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 1443751) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision ) @@ -156,14 +156,6 @@ return new QuadCell(bytes, offset, len); } - @Override //for performance - public List getNodes(Shape shape, int detailLevel, boolean inclParents) { - if (shape instanceof Point) - return super.getNodesAltPoint((Point) shape, detailLevel, inclParents); - else - return super.getNodes(shape, detailLevel, inclParents); - } - private void build( double x, double y, Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision 1443751) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision ) @@ -78,12 +78,14 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy { protected final SpatialPrefixTree grid; private final Map provider = new ConcurrentHashMap(); + protected final boolean simplifyIndexedCells; protected int defaultFieldValuesArrayLen = 2; protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ] - public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { + public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName, boolean simplifyIndexedCells) { super(grid.getSpatialContext(), fieldName); this.grid = grid; + this.simplifyIndexedCells = simplifyIndexedCells; } /** @@ -123,7 +125,7 @@ public Field[] createIndexableFields(Shape shape, double distErr) { int detailLevel = grid.getLevelForDistance(distErr); - List cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells + List cells = grid.getNodes(shape, detailLevel, true, simplifyIndexedCells);//intermediates cells //TODO is CellTokenStream supposed to be re-used somehow? see Uwe's comments: // http://code.google.com/p/lucene-spatial-playground/issues/detail?id=4 Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1443751) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) @@ -38,7 +38,8 @@ private int prefixGridScanLevel; public RecursivePrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { - super(grid, fieldName); + super(grid, fieldName, + true);//simplify indexed cells prefixGridScanLevel = grid.getMaxLevels() - 4;//TODO this default constant is dependent on the prefix grid size } Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 1443751) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision ) @@ -113,7 +113,7 @@ double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct(); double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx); int detailLevel = grid.getLevelForDistance(distErr); - List cells = grid.getNodes(snapMe, detailLevel, false); + List cells = grid.getNodes(snapMe, detailLevel, false, true); //calc bounding box of cells. double minX = Double.POSITIVE_INFINITY, maxX = Double.NEGATIVE_INFINITY; Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision 1443751) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision ) @@ -18,14 +18,24 @@ package org.apache.lucene.spatial.prefix.tree; import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; - -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.spatial.SpatialTestCase; +import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; import org.junit.Before; import org.junit.Test; -public class SpatialPrefixTreeTest extends LuceneTestCase { +public class SpatialPrefixTreeTest extends SpatialTestCase { //TODO plug in others and test them private SpatialContext ctx; @@ -36,11 +46,12 @@ public void setUp() throws Exception { super.setUp(); ctx = SpatialContext.GEO; - trie = new GeohashPrefixTree(ctx,4); } @Test public void testNodeTraverse() { + trie = new GeohashPrefixTree(ctx,4); + Node prevN = null; Node n = trie.getWorldNode(); assertEquals(0,n.getLevel()); @@ -57,4 +68,40 @@ assertTrue(prevNShape.getHeight() > sbox.getHeight()); } } + /** + * A PrefixTree pruning optimization gone bad. + * See UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision 1443751) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision ) @@ -43,7 +43,8 @@ public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy { public TermQueryPrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { - super(grid, fieldName); + super(grid, fieldName, + false);//do not simplify indexed cells } @Override @@ -54,7 +55,9 @@ Shape shape = args.getShape(); int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct)); - List cells = grid.getNodes(shape, detailLevel, false); + List cells = grid.getNodes(shape, detailLevel, + false,//no parents + true);//simplify BytesRef[] terms = new BytesRef[cells.size()]; int i = 0; for (Node cell : cells) { Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision 1443751) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision ) @@ -93,12 +93,6 @@ return new GhCell(bytes, offset, len); } - @Override - public List getNodes(Shape shape, int detailLevel, boolean inclParents) { - return shape instanceof Point ? super.getNodesAltPoint((Point) shape, detailLevel, inclParents) : - super.getNodes(shape, detailLevel, inclParents); - } - class GhCell extends Node { GhCell(String token) { super(token); Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java (revision 1443751) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java (revision ) @@ -18,9 +18,19 @@ */ import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Shape; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.spatial.StrategyTestCase; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialOperation; import org.junit.Test; @@ -68,6 +78,41 @@ SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, shape); args.setDistErrPct(distErrPct); return args; + } + + /** + * A PrefixTree pruning optimization gone bad. + * See