Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision 1443751)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision )
@@ -116,88 +116,86 @@
}
/**
- * Gets the intersecting & including cells for the specified shape, without exceeding detail level.
- * The result is a set of cells (no dups), sorted. Unmodifiable.
+ * Gets the intersecting cells for the specified shape, without exceeding
+ * detail level. If a cell is within the query shape then it's marked as a
+ * leaf and none of its children are added.
*
- * This implementation checks if shape is a Point and if so uses an implementation that
- * recursively calls {@link Node#getSubCell(com.spatial4j.core.shape.Point)}. Cell subclasses
- * ideally implement that method with a quick implementation, otherwise, subclasses should
- * override this method to invoke {@link #getNodesAltPoint(com.spatial4j.core.shape.Point, int, boolean)}.
- * TODO consider another approach returning an iterator -- won't build up all cells in memory.
+ * This implementation checks if shape is a Point and if so returns {@link
+ * #getNodes(com.spatial4j.core.shape.Point, int, boolean)}.
+ *
+ * @param shape the shape; non-null
+ * @param detailLevel the maximum detail level to get cells for
+ * @param inclParents if true then all parent cells of leaves are returned
+ * too. The top world cell is never returned.
+ * @param simplify for non-point shapes, this will simply/aggregate sets of
+ * complete leaves in a cell to its parent, resulting in
+ * ~20-25% fewer cells.
+ * @return a set of cells (no dups), sorted, immutable, non-null
*/
- public List getNodes(Shape shape, int detailLevel, boolean inclParents) {
+ public List getNodes(Shape shape, int detailLevel, boolean inclParents,
+ boolean simplify) {
+ //TODO consider an on-demand iterator -- it won't build up all cells in memory.
if (detailLevel > maxLevels) {
throw new IllegalArgumentException("detailLevel > maxLevels");
}
-
- List cells;
if (shape instanceof Point) {
- //optimized point algorithm
- final int initialCapacity = inclParents ? 1 + detailLevel : 1;
- cells = new ArrayList(initialCapacity);
- recursiveGetNodes(getWorldNode(), (Point) shape, detailLevel, true, cells);
- assert cells.size() == initialCapacity;
- } else {
- cells = new ArrayList(inclParents ? 1024 : 512);
- recursiveGetNodes(getWorldNode(), shape, detailLevel, inclParents, cells);
+ return getNodes((Point) shape, detailLevel, inclParents);
}
- if (inclParents) {
- Node c = cells.remove(0);//remove getWorldNode()
- assert c.getLevel() == 0;
- }
+ List cells = new ArrayList(inclParents ? 4096 : 2048);
+ recursiveGetNodes(getWorldNode(), shape, detailLevel, inclParents, simplify, cells);
return cells;
}
- private void recursiveGetNodes(Node node, Shape shape, int detailLevel, boolean inclParents,
- Collection result) {
- if (node.isLeaf()) {//cell is within shape
- result.add(node);
- return;
+ /**
+ * Returns true if node was added as a leaf. If it wasn't it recursively
+ * descends.
+ */
+ private boolean recursiveGetNodes(Node node, Shape shape, int detailLevel,
+ boolean inclParents, boolean simplify,
+ List result) {
+ if (node.getLevel() == detailLevel) {
+ node.setLeaf();//FYI might already be a leaf
}
- final Collection subCells = node.getSubCells(shape);
- if (node.getLevel() == detailLevel - 1) {
- if (subCells.size() < node.getSubCellsSize() || node.getLevel() == 0) {
- if (inclParents)
+ if (node.isLeaf()) {
- result.add(node);
+ result.add(node);
- for (Node subCell : subCells) {
- subCell.setLeaf();
+ return true;
- }
+ }
- result.addAll(subCells);
- } else {//a bottom level (i.e. detail level) optimization where all boxes intersect, so use parent cell.
- node.setLeaf();//the cell may not be strictly within but its close
+ if (inclParents && node.getLevel() != 0)
- result.add(node);
+ result.add(node);
- }
- } else {
- if (inclParents) {
- result.add(node);
- }
+
+ Collection subCells = node.getSubCells(shape);
+ int leaves = 0;
- for (Node subCell : subCells) {
+ for (Node subCell : subCells) {
- recursiveGetNodes(subCell, shape, detailLevel, inclParents, result);//tail call
+ if (recursiveGetNodes(subCell, shape, detailLevel, inclParents, simplify, result))
+ leaves++;
- }
+ }
- }
- }
+ //can we simplify?
+ if (simplify && leaves == node.getSubCellsSize() && node.getLevel() != 0) {
+ //Optimization: substitute the parent as a leaf instead of adding all
+ // children as leaves
- private void recursiveGetNodes(Node node, Point point, int detailLevel, boolean inclParents,
- Collection result) {
- if (inclParents) {
+ //remove the leaves
+ do {
+ result.remove(result.size() - 1);//remove last
+ } while (--leaves > 0);
+ //add node as the leaf
+ node.setLeaf();
+ if (!inclParents) // otherwise it was already added up above
- result.add(node);
+ result.add(node);
+ return true;
}
- final Node pCell = node.getSubCell(point);
- if (node.getLevel() == detailLevel - 1) {
- pCell.setLeaf();
- result.add(pCell);
- } else {
- recursiveGetNodes(pCell, point, detailLevel, inclParents, result);//tail call
+ return false;
- }
+ }
- }
/**
- * Subclasses might override {@link #getNodes(com.spatial4j.core.shape.Shape, int, boolean)}
- * and check if the argument is a shape and if so, delegate
- * to this implementation, which calls {@link #getNode(com.spatial4j.core.shape.Point, int)} and
- * then calls {@link #getNode(String)} repeatedly if inclParents is true.
+ * A Point-optimized implementation of
+ * {@link #getNodes(com.spatial4j.core.shape.Shape, int, boolean)}. That
+ * method in facts calls this for points.
+ *
+ * This implementation depends on {@link #getNode(String)} being fast, as its
+ * called repeatedly when incPlarents is true.
*/
- protected final List getNodesAltPoint(Point p, int detailLevel, boolean inclParents) {
+ public List getNodes(Point p, int detailLevel, boolean inclParents) {
Node cell = getNode(p, detailLevel);
if (!inclParents) {
return Collections.singletonList(cell);
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 1443751)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision )
@@ -156,14 +156,6 @@
return new QuadCell(bytes, offset, len);
}
- @Override //for performance
- public List getNodes(Shape shape, int detailLevel, boolean inclParents) {
- if (shape instanceof Point)
- return super.getNodesAltPoint((Point) shape, detailLevel, inclParents);
- else
- return super.getNodes(shape, detailLevel, inclParents);
- }
-
private void build(
double x,
double y,
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision 1443751)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision )
@@ -78,12 +78,14 @@
public abstract class PrefixTreeStrategy extends SpatialStrategy {
protected final SpatialPrefixTree grid;
private final Map provider = new ConcurrentHashMap();
+ protected final boolean simplifyIndexedCells;
protected int defaultFieldValuesArrayLen = 2;
protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
- public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
+ public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName, boolean simplifyIndexedCells) {
super(grid.getSpatialContext(), fieldName);
this.grid = grid;
+ this.simplifyIndexedCells = simplifyIndexedCells;
}
/**
@@ -123,7 +125,7 @@
public Field[] createIndexableFields(Shape shape, double distErr) {
int detailLevel = grid.getLevelForDistance(distErr);
- List cells = grid.getNodes(shape, detailLevel, true);//true=intermediates cells
+ List cells = grid.getNodes(shape, detailLevel, true, simplifyIndexedCells);//intermediates cells
//TODO is CellTokenStream supposed to be re-used somehow? see Uwe's comments:
// http://code.google.com/p/lucene-spatial-playground/issues/detail?id=4
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1443751)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision )
@@ -38,7 +38,8 @@
private int prefixGridScanLevel;
public RecursivePrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
- super(grid, fieldName);
+ super(grid, fieldName,
+ true);//simplify indexed cells
prefixGridScanLevel = grid.getMaxLevels() - 4;//TODO this default constant is dependent on the prefix grid size
}
Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 1443751)
+++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision )
@@ -113,7 +113,7 @@
double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct();
double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
- List cells = grid.getNodes(snapMe, detailLevel, false);
+ List cells = grid.getNodes(snapMe, detailLevel, false, true);
//calc bounding box of cells.
double minX = Double.POSITIVE_INFINITY, maxX = Double.NEGATIVE_INFINITY;
Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision 1443751)
+++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision )
@@ -18,14 +18,24 @@
package org.apache.lucene.spatial.prefix.tree;
import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
-
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.spatial.SpatialTestCase;
+import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
import org.junit.Before;
import org.junit.Test;
-public class SpatialPrefixTreeTest extends LuceneTestCase {
+public class SpatialPrefixTreeTest extends SpatialTestCase {
//TODO plug in others and test them
private SpatialContext ctx;
@@ -36,11 +46,12 @@
public void setUp() throws Exception {
super.setUp();
ctx = SpatialContext.GEO;
- trie = new GeohashPrefixTree(ctx,4);
}
@Test
public void testNodeTraverse() {
+ trie = new GeohashPrefixTree(ctx,4);
+
Node prevN = null;
Node n = trie.getWorldNode();
assertEquals(0,n.getLevel());
@@ -57,4 +68,40 @@
assertTrue(prevNShape.getHeight() > sbox.getHeight());
}
}
+ /**
+ * A PrefixTree pruning optimization gone bad.
+ * See UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision 1443751)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision )
@@ -43,7 +43,8 @@
public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy {
public TermQueryPrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
- super(grid, fieldName);
+ super(grid, fieldName,
+ false);//do not simplify indexed cells
}
@Override
@@ -54,7 +55,9 @@
Shape shape = args.getShape();
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
- List cells = grid.getNodes(shape, detailLevel, false);
+ List cells = grid.getNodes(shape, detailLevel,
+ false,//no parents
+ true);//simplify
BytesRef[] terms = new BytesRef[cells.size()];
int i = 0;
for (Node cell : cells) {
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision 1443751)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision )
@@ -93,12 +93,6 @@
return new GhCell(bytes, offset, len);
}
- @Override
- public List getNodes(Shape shape, int detailLevel, boolean inclParents) {
- return shape instanceof Point ? super.getNodesAltPoint((Point) shape, detailLevel, inclParents) :
- super.getNodes(shape, detailLevel, inclParents);
- }
-
class GhCell extends Node {
GhCell(String token) {
super(token);
Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java (revision 1443751)
+++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java (revision )
@@ -18,9 +18,19 @@
*/
import com.spatial4j.core.context.SpatialContextFactory;
+import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
+import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.junit.Test;
@@ -68,6 +78,41 @@
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, shape);
args.setDistErrPct(distErrPct);
return args;
+ }
+
+ /**
+ * A PrefixTree pruning optimization gone bad.
+ * See