diff --git a/lucene/benchmark/conf/spatial2.alg b/lucene/benchmark/conf/spatial2.alg new file mode 100644 index 0000000..6e968f1 --- /dev/null +++ b/lucene/benchmark/conf/spatial2.alg @@ -0,0 +1,97 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# ------------------------------------------------------------------------------------- +# Spatial search benchmark +# In order to use this, you'll need to first run 'ant geonames-files'. +# You may need more memory when running this: -Dtask.mem=1000M (5000M for direct) +# For docs on what options are available, see the javadocs. + +# Benchmark existing geohash, and quad implementations +# ------------------------------------------------------------------------------------- +### Spatial Context, Grid, Strategy config +#work.dir=work/geohash +#work.dir=work/quad +work.dir=work/flex +doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker +#spatial.prefixTree=geohash +#spatial.prefixTree=quad +spatial.prefixTree=flex +spatial.levelPattern=3,1*,3 +# 1m or better +spatial.maxDistErr = 0.000009 +spatial.docPointsOnly=true +spatial.pruneLeafyBranches=false +#spatial.distErrPct=.25 +codec.prostingsFormat=Direct + +### Source & Doc +content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource +line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser +docs.file=work/geonames/allCountries.txt +doc.tokenized=false + +### Directory +directory=FSDirectory +compound=false +merge.factor=10 +ram.flush.mb=64 +concurrent.merge.scheduler.max.thread.count=2 + +### Query +query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker +query.file=work/geonames/allCountries.txt +query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser +#query.file.maxQueries=1000 + +# Next 3 props convert query points to circles with a random radius and then optionally bbox'es +query.spatial.radiusDegrees=0 +query.spatial.radiusDegreesRandPlusMinus=3 +query.spatial.bbox=true +query.spatial.prefixGridScanLevel=pgsl:-1:-2:-3:-4:-5:-6:-7:-8:-9:-10:1 +query.spatial.score=false + +### Misc + +log.step.AddDoc = 100000 + +{ "Populate" + ResetSystemErase + CreateIndex + #1 million docs, to speed up add, do it parallely + [{ "MAddDocs" AddDoc} : 500000] : 4 + ForceMerge(1) + CommitIndex + CloseIndex +} : 1 + +#set above round to 0 on subsequent runs if not changing indexing but experimenting with search + +OpenReader +{"WarmJIT" Search > : 4000 +CloseReader + +{ "Rounds" + ResetSystemSoft + + OpenReader + Search + {"Queries" Search > : 4000 + CloseReader + + NewRound +} : 11 +RepSumByPrefRound Queries diff --git a/lucene/spatial/NOTE b/lucene/spatial/NOTE new file mode 100644 index 0000000..8cbd068 --- /dev/null +++ b/lucene/spatial/NOTE @@ -0,0 +1,2 @@ +'[junit4]' is not recognized as an internal or external command, +operable program or batch file. diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FlexPrefixTree2D.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FlexPrefixTree2D.java new file mode 100644 index 0000000..c653694 --- /dev/null +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FlexPrefixTree2D.java @@ -0,0 +1,631 @@ +package org.apache.lucene.spatial.prefix.tree; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Map; + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Rectangle; +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; + + +public class FlexPrefixTree2D extends SpatialPrefixTree { + + /** + * Factory for creating {@link FlexPrefixTree2D} instances with useful defaults + */ + public static class Factory extends SpatialPrefixTreeFactory { + + public static final String LEVEL_PATTERN = "levelPattern"; + public static final String LEVEL_PATTERN_REGEX = "(\\d,)*(\\d\\*)(,\\d)*"; //only the number of cells to be repeated must be specified + @Override + protected void init(Map args, SpatialContext ctx) { + this.args = args; + this.ctx = ctx; + initNumberOfCellsPerLevel(); + initMaxLevels(); + } + + private void initNumberOfCellsPerLevel() { + + } + + @Override + protected int getLevelForDistance(double degrees) { + FlexPrefixTree2D grid = new FlexPrefixTree2D(ctx, MAX_LEVELS_POSSIBLE); + return grid.getLevelForDistance(degrees); + } + + @Override + protected SpatialPrefixTree newSPT() { + String levelPattern = args.get(LEVEL_PATTERN); + if (levelPattern != null && levelPattern.matches(LEVEL_PATTERN_REGEX)) { + int maxLevels=this.maxLevels != null ? this.maxLevels : MAX_LEVELS_POSSIBLE; + int []numberOfSubCells=null; + String []levels = levelPattern.split(","); + numberOfSubCells = new int[MAX_LEVELS_POSSIBLE]; + int []temp = new int[levels.length]; + int repeatLevel=levelPatternToIntArray(levels,temp); + int numberOfTimesTobeRepeated=maxLevels-(sum(temp)-temp[repeatLevel]); + System.arraycopy(temp,0,numberOfSubCells,0,repeatLevel); + for(int i=0;i> division); + } + + doubleToInt = Math.min((1 << maxLevels) / (bounds.getMaxY() - bounds.getMinY()), (1 << maxLevels) / (bounds.getMaxX() - bounds.getMinX())); + + intToDouble = Math.max((bounds.getMaxX() - bounds.getMinX()) / (1 << maxLevels), (bounds.getMaxY() - bounds.getMinY()) / (1 << maxLevels)); + + newOriginX = bounds.getMinX(); + newOriginY = bounds.getMinY(); + xMax = (int) Math.ceil(bounds.getMaxX() * doubleToInt); + yMax = (int) Math.ceil(bounds.getMaxX() * doubleToInt); + } + + private int getMaxLevelsFromPowersOfFour(int[] numberOfCellsAsExponentOfFour) { + int sum = 0; + for (int i=0;i (gridSizes[i]*intToDouble)) { + return i; + } + } + return maxLevels; + } + + @Override + public double getDistanceForLevel(int level) { + if (level < 1 || level > getMaxLevels()) + throw new IllegalArgumentException("Level must be in 1 to maxLevels range"); + //get the grid width and height for that level + double width = gridSizes[level] * intToDouble; + double height = gridSizes[level] * intToDouble; + //Use standard cartesian hypotenuse. For geospatial, this answer is larger + // than the correct one but it's okay to over-estimate. + return Math.sqrt(width * width + height * height); + + } + + @Override + public FlexCell getWorldCell() { + return new CellStack(maxLevels, xMin, yMin).cells[0]; + } + + @Override + public Cell readCell(BytesRef term, Cell scratch) { + FlexCell cell = (FlexCell) scratch; + if (scratch == null) { + cell = getWorldCell(); + } + //First get the length of the term + int termLength = term.length; + + //We store at cell a cellstack len + leaf bytes + termLength -= term.bytes[term.offset + term.length - 1] == LEAF_BYTE ? 1 : 0; + + //Now from the cellstack obtain the correct numbered cell + FlexCell cells[] = cell.getCellStack().cells; + cells[termLength].reuseWithFreshTerm(term); + cell.getCellStack().invalidate(0); + return cells[termLength]; + } + + @Override + public String toString() { + StringBuilder subcells = new StringBuilder(); + for(int i=0;i 0 && cellStack.term.bytes[cellStack.term.offset + cellStack.term.length - 1] == LEAF_BYTE); + if (isLeaf) + cellStack.term.length--; + return this; + } + + private Cell reuse() { + cellStack.invalidate(cellLevel); + this.isShapeSet = false; + this.isLeaf = false; + this.shapeRel = null; + return this; + } + + + private SpatialRelation relateIntegerCoordinate(int int_min, int int_max, int ext_min, int ext_max) { + if (ext_min > int_max || ext_max < int_min) { + return SpatialRelation.DISJOINT; + } + + if (ext_min >= int_min && ext_max <= int_max) { + return SpatialRelation.CONTAINS; + } + + if (ext_min <= int_min && ext_max >= int_max) { + return SpatialRelation.WITHIN; + } + return SpatialRelation.INTERSECTS; + } + + protected SpatialRelation relateIntegerRectangle() { + int xMax = this.xMin + gridSizes[this.cellLevel]; + if (xMax < FlexPrefixTree2D.this.xMax) { + xMax -= 1; + } + int ymax = this.yMin + gridSizes[this.cellLevel]; + if (ymax < FlexPrefixTree2D.this.yMax) { + ymax -= 1; + } + SpatialRelation yIntersect = relateIntegerCoordinate(this.yMin, ymax, this.cellStack.shapeFilterYMin, this.cellStack.shapeFilterYMax); + if (yIntersect == SpatialRelation.DISJOINT) + return SpatialRelation.DISJOINT; + SpatialRelation xIntersect = relateIntegerCoordinate(this.xMin, xMax, this.cellStack.shapeFilterXMin, this.cellStack.shapeFilterXMax); + if (xIntersect == SpatialRelation.DISJOINT) + return SpatialRelation.DISJOINT; + if (xIntersect == yIntersect)//in agreement + return xIntersect; + if (this.cellStack.shapeFilterXMin == this.xMin && this.cellStack.shapeFilterXMax == xMax) + return yIntersect; + if (this.cellStack.shapeFilterYMin == this.yMin && this.cellStack.shapeFilterYMax == ymax) + return xIntersect; + return SpatialRelation.INTERSECTS; + } + + } + + + /** + * An Iterator for FlexCells. This iterator reuses cells at a level and iterates over the siblings + * initIter can be used to reuse the cell Iterator + */ + private class FlexPrefixTreeIterator extends CellIterator { + + private final BytesRef term; + private final int bytePos; + private final int endCellNumber; + private final FlexCell cell; + private int nextCellNumber; + private Shape shapeFilter; + + + protected FlexPrefixTreeIterator(FlexCell cell, BytesRef sharedTerm, int level) { + this.term = sharedTerm; + this.cell = cell; + if (level < maxLevels) { + this.endCellNumber = (1 <<( numberOfSubCellsAsExponentOfFour[level] + numberOfSubCellsAsExponentOfFour[level])) + 1; + } else { + this.endCellNumber = 0; + } + this.bytePos = level; + } + + //Inititalizes the Iterator, so that we can reuse the iterator + protected CellIterator init(Shape shapeFilter, int start) { + this.nextCell = null; + this.thisCell = null; + //Level 0 does not store a byte its byte pos is -1, but, in makeshape this is handled + //Level 1 stores its byte at index 0 + //this.bytePos = this.scratch.cellLevel-1; + this.shapeFilter = shapeFilter; + this.cell.cellStack.findIntegerBoundingBox(shapeFilter); + this.nextCellNumber = start; + return this; + } + + //Concatenates to the source BytesRef the given byte and places into te target + private void changeTailByte(byte b) { + term.bytes[term.offset + bytePos] = b; + } + + @Override + public boolean hasNext() { + thisCell = null; + if (nextCell != null)//calling hasNext twice in a row + return true; + while (levelHasUntraversedCell()) { + SpatialRelation rel = null; + nextCell = cell; + if (shapeFilter == null) { + return true; + } else { + FlexCell nextFlexCell = (FlexCell) nextCell; + nextFlexCell.cellStack.decode(nextFlexCell.cellLevel); + rel = getSpatialRelation(nextFlexCell); + if (rel.intersects()) { + nextCell.setShapeRel(rel); + if (rel == SpatialRelation.WITHIN) + nextCell.setLeaf(); // Since the relation is a within no further decomposition will be required + if (rel == SpatialRelation.CONTAINS) { + stopLevelIteration(); + } + return true; + } + } + } + return false; + } + + private SpatialRelation getSpatialRelation(FlexCell nextFlexCell) { + SpatialRelation rel = null; + if (!nextFlexCell.cellStack.shapeFilterBoundingBox.getCrossesDateLine()) { + rel = nextFlexCell.relateIntegerRectangle(); + if (!(shapeFilter instanceof Rectangle || shapeFilter instanceof Point) || (nextFlexCell.cellStack.shapeFilterBoundingBox.getCrossesDateLine()) || rel == SpatialRelation.WITHIN) + rel = null; + } + if (rel == null) { + rel = nextCell.getShape().relate(shapeFilter); + } + return rel; + } + + + private void stopLevelIteration() { + nextCellNumber = endCellNumber + 1; + } + + //Populates into scratch the next cell in z-order TODO Hilbert ordering + private boolean levelHasUntraversedCell() { + if (nextCellNumber > endCellNumber) { + nextCell = null; + return false; + } + this.cell.cellStack.term.length = this.cell.cellLevel; + //We must call this as we want the cell to invalidate its ShapeCache + changeTailByte((byte) nextCellNumber); + cell.reuse(); + ++nextCellNumber; + return true; + } + } + + /** + * A stack of flexCells with the following characteristics + * - Lazy decoding of cells + * - Cells from the same CellStack share BytesRef + */ + private class CellStack { + + protected final FlexCell cells[]; + protected int lastDecodedLevel = 0; + protected BytesRef term; + + //ShapeFilter bounding box and calculations + private int shapeFilterXMin; + private int shapeFilterXMax; + private int shapeFilterYMin; + private int shapeFilterYMax; + private Rectangle shapeFilterBoundingBox; + private Shape shapeFilter; + + public CellStack(int maxLevels, int xmin, int ymin) { + this.cells = new FlexCell[maxLevels + 1]; + term = new BytesRef(maxLevels + 1); //+1 For leaf and this byteRef will be shared within the stack + for (int level = maxLevels; level >= 0; --level) { + if (level != maxLevels) { + cells[level] = new FlexCell(cells[level + 1], this, level); + } else { + cells[level] = new FlexCell(null, this, level); + } + } + //? The xmin,ymin needs to be set for the top cell. From there its decoded lazily a level at a time + cells[0].setMinCornerCoordinates(xmin, ymin); + } + + private void findIntegerBoundingBox(Shape shapeFilter) { + if (shapeFilter != null && this.shapeFilter != shapeFilter) { // object equivalence? + //TODO this remains same for a given FPT and given shape + shapeFilterBoundingBox = shapeFilter.getBoundingBox(); + this.shapeFilterXMax = (int) ((shapeFilterBoundingBox.getMaxX() - bounds.getMinX()) * doubleToInt); + this.shapeFilterXMin = (int) ((shapeFilterBoundingBox.getMinX() - bounds.getMinX()) * doubleToInt); + this.shapeFilterYMax = (int) ((shapeFilterBoundingBox.getMaxY() - bounds.getMinY()) * doubleToInt); + this.shapeFilterYMin = (int) ((shapeFilterBoundingBox.getMinY() - bounds.getMinY()) * doubleToInt); + this.shapeFilter = shapeFilter; + } + + } + + protected void decode(int cellLevel) { + int xmin; + int ymin; + int row; + int col; + int c; + int division; + //decode all cells from the last decoded cell to the desired cell + for (int i = lastDecodedLevel; i < cellLevel; i++) { + xmin = cells[i].xMin; + ymin = cells[i].yMin; + c = term.bytes[term.offset + i] - 2; + division = numberOfSubCellsAsExponentOfFour[i]; + col = (c >> division); + row = (c - (1 << division) * col); // Is this worthwhile? + xmin += gridSizes[i + 1] * col; + ymin += gridSizes[i + 1] * row; + cells[i + 1].setMinCornerCoordinates(xmin, ymin); + } + if (lastDecodedLevel < cellLevel) { + lastDecodedLevel = cellLevel; + } + } + + + /** + * Invalidates the decoding of a cell forcing decoding to happen again + * + * @param cellLevel the Cell whose decoding is to be done + */ + protected void invalidate(int cellLevel) { + lastDecodedLevel = Math.max(cellLevel - 1, 0); //Note: Cell at level 0 is always decoded. + } + + } +} diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java index f5a4bc4..f4992ed 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java @@ -52,6 +52,9 @@ public abstract class SpatialPrefixTreeFactory { instance = new GeohashPrefixTree.Factory(); else if ("quad".equalsIgnoreCase(cname)) instance = new QuadPrefixTree.Factory(); + else if("flex".equalsIgnoreCase(cname)){ + instance = new FlexPrefixTree2D.Factory(); + } else { try { Class c = classLoader.loadClass(cname); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java index f260502..3173357 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java @@ -26,6 +26,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.spatial.bbox.BBoxStrategy; import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.FlexPrefixTree2D; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; @@ -48,10 +49,16 @@ public class DistanceStrategyTest extends StrategyTestCase { SpatialPrefixTree grid; SpatialStrategy strategy; - grid = new QuadPrefixTree(ctx,25); + grid = new FlexPrefixTree2D(ctx,25); strategy = new RecursivePrefixTreeStrategy(grid, "recursive_quad"); ctorArgs.add(new Object[]{new Param(strategy)}); + grid = new FlexPrefixTree2D(ctx,25); + RecursivePrefixTreeStrategy rec_strategy = new RecursivePrefixTreeStrategy(grid, "recursive_flex"); + rec_strategy.setPruneLeafyBranches(false); + strategy = rec_strategy; + ctorArgs.add(new Object[]{new Param(strategy)}); + grid = new GeohashPrefixTree(ctx,12); strategy = new TermQueryPrefixTreeStrategy(grid, "termquery_geohash"); ctorArgs.add(new Object[]{new Param(strategy)}); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/PortedSolr3Test.java b/lucene/spatial/src/test/org/apache/lucene/spatial/PortedSolr3Test.java index cc3fb02..cf7cab4 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/PortedSolr3Test.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/PortedSolr3Test.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.FlexPrefixTree2D; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; @@ -62,6 +63,12 @@ public class PortedSolr3Test extends StrategyTestCase { strategy = new RecursivePrefixTreeStrategy(grid, "recursive_quad"); ctorArgs.add(new Object[]{new Param(strategy)}); + grid = new FlexPrefixTree2D(ctx,25); + RecursivePrefixTreeStrategy rec_strategy = new RecursivePrefixTreeStrategy(grid, "recursive_flex"); + rec_strategy.setPruneLeafyBranches(false); + strategy = rec_strategy; + ctorArgs.add(new Object[]{new Param(strategy)}); + grid = new GeohashPrefixTree(ctx,12); strategy = new TermQueryPrefixTreeStrategy(grid, "termquery_geohash"); ctorArgs.add(new Object[]{new Param(strategy)}); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java index 9eed512..aea2ba5 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java @@ -22,6 +22,7 @@ import com.spatial4j.core.shape.Shape; import org.apache.lucene.spatial.bbox.BBoxStrategy; import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.FlexPrefixTree2D; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; @@ -44,10 +45,12 @@ public class QueryEqualsHashCodeTest extends LuceneTestCase { final SpatialPrefixTree gridQuad = new QuadPrefixTree(ctx,10); final SpatialPrefixTree gridGeohash = new GeohashPrefixTree(ctx,10); + final SpatialPrefixTree gridFlex = new FlexPrefixTree2D(ctx,10); Collection strategies = new ArrayList<>(); strategies.add(new RecursivePrefixTreeStrategy(gridGeohash, "recursive_geohash")); strategies.add(new TermQueryPrefixTreeStrategy(gridQuad, "termquery_quad")); + strategies.add(new TermQueryPrefixTreeStrategy(gridFlex, "termquery_flex")); strategies.add(new PointVectorStrategy(ctx, "pointvector")); strategies.add(new BBoxStrategy(ctx, "bbox")); strategies.add(new SerializedDVStrategy(ctx, "serialized")); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java index 61a45b4..2a396ea 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java @@ -250,4 +250,8 @@ public abstract class StrategyTestCase extends SpatialTestCase { deleteAll();//clean up after ourselves } + public void testSomething(){ + + } + } diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java index 6dbaa9b..b28e2f7 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.spatial.StrategyTestCase; +import org.apache.lucene.spatial.prefix.tree.FlexPrefixTree2D; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java index e302554..7215446 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java @@ -34,7 +34,9 @@ import org.apache.lucene.search.Query; import org.apache.lucene.spatial.StrategyTestCase; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.CellIterator; +import org.apache.lucene.spatial.prefix.tree.FlexPrefixTree2D; import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.prefix.tree.LegacyCell; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; @@ -61,8 +63,10 @@ import static com.spatial4j.core.shape.SpatialRelation.DISJOINT; import static com.spatial4j.core.shape.SpatialRelation.INTERSECTS; import static com.spatial4j.core.shape.SpatialRelation.WITHIN; -/** Randomized PrefixTree test that considers the fuzziness of the - * results introduced by grid approximation. */ +/** + * Randomized PrefixTree test that considers the fuzziness of the + * results introduced by grid approximation. + */ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { static final int ITERATIONS = 10; @@ -71,16 +75,22 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { private SpatialContext ctx2D; public void setupGrid(int maxLevels) throws IOException { - if (randomBoolean()) - setupQuadGrid(maxLevels); - else - setupGeohashGrid(maxLevels); + switch (1) { + case 1: + setupFlexGrid(maxLevels); + ((RecursivePrefixTreeStrategy) strategy).setPruneLeafyBranches(false); + break; + case 2: + setupQuadGrid(maxLevels); + ((RecursivePrefixTreeStrategy) strategy).setPruneLeafyBranches(randomBoolean()); + break; + case 3: + setupGeohashGrid(maxLevels); + ((RecursivePrefixTreeStrategy) strategy).setPruneLeafyBranches(randomBoolean()); + break; + } setupCtx2D(ctx); - //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid - - ((RecursivePrefixTreeStrategy)strategy).setPruneLeafyBranches(randomBoolean()); - System.out.println("Strategy: " + strategy.toString()); } @@ -108,6 +118,24 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName()); } + private void setupFlexGrid(int maxLevels) { + //non-geospatial makes this test a little easier (in gridSnap), and using boundary values 2^X raises + // the prospect of edge conditions we want to test, plus makes for simpler numbers (no decimals). + SpatialContextFactory factory = new SpatialContextFactory(); + factory.geo = false; + factory.worldBounds = new RectangleImpl(0, 256, -128, 128, null); + this.ctx = factory.newSpatialContext(); + //A fairly shallow grid, and default 2.5% distErrPct + if (maxLevels == -1) + maxLevels = randomIntBetween(1, 8);//max 64k cells (4^8), also 256*256 + int[] numberOfCells = new int[maxLevels + 1]; + for (int i = 0; i < maxLevels; ++i) { + numberOfCells[i] = randomIntBetween(1, 3); + } + this.grid = new FlexPrefixTree2D(ctx, maxLevels); + this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName()); + } + public void setupGeohashGrid(int maxLevels) { this.ctx = SpatialContext.GEO; //A fairly shallow grid, and default 2.5% distErrPct @@ -138,7 +166,9 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { doTest(SpatialOperation.Contains); } - /** See LUCENE-5062, {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */ + /** + * See LUCENE-5062, {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. + */ @Test public void testContainsPairOverlap() throws IOException { setupQuadGrid(3); @@ -164,7 +194,8 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { assertTrue(searchResults.numFound == 0); } - @Test /** LUCENE-4916 */ + @Test + /** LUCENE-4916 */ public void testWithinLeafApproxRule() throws IOException { setupQuadGrid(2);//4x4 grid //indexed shape will simplify to entire right half (2 top cells) @@ -178,13 +209,13 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { // from the query and thus not a match. assertTrue(executeQuery(strategy.makeQuery( new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(38, 192, -72, 56)) - ), 1).numFound==0);//no-match + ), 1).numFound == 0);//no-match //this time the rect is a little bigger and is considered a match. It's a // an acceptable false-positive because of the grid approximation. assertTrue(executeQuery(strategy.makeQuery( new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(38, 192, -72, 80)) - ), 1).numFound==1);//match + ), 1).numFound == 1);//match } @Test @@ -207,8 +238,8 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { Collection shapes; if (shape instanceof ShapePair) { shapes = new ArrayList<>(2); - shapes.add(((ShapePair)shape).shape1); - shapes.add(((ShapePair)shape).shape2); + shapes.add(((ShapePair) shape).shape1); + shapes.add(((ShapePair) shape).shape2); } else { shapes = Collections.singleton(shape); } @@ -284,7 +315,9 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { final Shape queryShape; switch (randomInt(10)) { - case 0: queryShape = randomPoint(); break; + case 0: + queryShape = randomPoint(); + break; // LUCENE-5549 //TODO debug: -Dtests.method=testWithin -Dtests.multiplier=3 -Dtests.seed=5F5294CE2E075A3E:AAD2F0F79288CA64 // case 1:case 2:case 3: @@ -292,11 +325,12 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { // queryShape = randomShapePairRect(!biasContains);//invert biasContains for query side // break; // } - default: queryShape = randomRectangle(); + default: + queryShape = randomRectangle(); } final Shape queryShapeGS = gridSnap(queryShape); - final boolean opIsDisjoint = operation == SpatialOperation.IsDisjointTo; + final boolean opIsDisjoint = (operation == SpatialOperation.IsDisjointTo); //Generate truth via brute force: // We ensure true-positive matches (if the predicate on the raw shapes match @@ -349,6 +383,7 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { String id = result.getId(); boolean removed = remainingExpectedIds.remove(id); if (!removed && (!opIsDisjoint && !secondaryIds.contains(id))) { + System.out.println(indexedShapes.get(id)); fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape); } } @@ -400,7 +435,12 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { Cell cell = cells.next(); if (!cell.isLeaf()) continue; - cellShapes.add(cell.getShape()); + if (cell instanceof LegacyCell) { + cellShapes.add(cell.getShape()); + } else { + Rectangle rect = (Rectangle) cell.getShape(); + cellShapes.add(ctx.makeRectangle(rect.getMinX(), rect.getMaxX(), rect.getMinY(), rect.getMaxY())); + } } return new ShapeCollection<>(cellShapes, ctx).getBoundingBox(); } @@ -458,21 +498,35 @@ public class RandomSpatialOpFuzzyPrefixTreeTest extends StrategyTestCase { //See if the correct answer is actually Contains, when the indexed shapes are adjacent, // creating a larger shape that contains the input shape. boolean pairTouches = shape1.relate(shape2).intersects(); - if (!pairTouches) - return r; + if (!pairTouches) { + if (isAdjacent(shape1, shape2)) { + return CONTAINS; + } else return r; + } //test all 4 corners // Note: awkwardly, we use a non-geo context for this because in geo, -180 & +180 are the same place, which means - // that "other" might wrap the world horizontally and yet all it's corners could be in shape1 (or shape2) even - // though shape1 is only adjacent to the dateline. I couldn't think of a better way to handle this. - Rectangle oRect = (Rectangle)other; + // that "other" might wrap the world horizontally and yet all it's corners could be in shape1 (or shape2) even + // though shape1 is only adjacent to the dateline. I couldn't think of a better way to handle this. + Rectangle oRect = (Rectangle) other; if (cornerContainsNonGeo(oRect.getMinX(), oRect.getMinY()) && cornerContainsNonGeo(oRect.getMinX(), oRect.getMaxY()) && cornerContainsNonGeo(oRect.getMaxX(), oRect.getMinY()) - && cornerContainsNonGeo(oRect.getMaxX(), oRect.getMaxY()) ) + && cornerContainsNonGeo(oRect.getMaxX(), oRect.getMaxY())) return CONTAINS; return r; } + private boolean isAdjacent(Shape shape1, Shape shape2) { + if (Math.nextAfter(shape1.getBoundingBox().getMaxX(), Double.POSITIVE_INFINITY) == shape2.getBoundingBox().getMinX() || Math.nextAfter(shape2.getBoundingBox().getMaxX(), Double.POSITIVE_INFINITY) == shape1.getBoundingBox().getMinX()) { + return true; + } + if (Math.nextAfter(shape1.getBoundingBox().getMaxY(), Double.POSITIVE_INFINITY) == shape2.getBoundingBox().getMinY() || Math.nextAfter(shape2.getBoundingBox().getMaxY(), Double.POSITIVE_INFINITY) == shape1.getBoundingBox().getMinY()) { + return true; + } + + return false; + } + private boolean cornerContainsNonGeo(double x, double y) { Shape pt = ctx2D.makePoint(x, y); return shape1_2D.relate(pt).intersects() || shape2_2D.relate(pt).intersects(); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestTermQueryPrefixGridStrategy.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestTermQueryPrefixGridStrategy.java index 97c2690..bd79316 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestTermQueryPrefixGridStrategy.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestTermQueryPrefixGridStrategy.java @@ -24,8 +24,12 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.spatial.SpatialTestCase; +import org.apache.lucene.spatial.prefix.tree.FlexPrefixTree2D; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgsParser; +import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import java.io.IOException; @@ -34,10 +38,25 @@ import java.util.Arrays; public class TestTermQueryPrefixGridStrategy extends SpatialTestCase { + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + this.ctx = SpatialContext.GEO; + } + @Test - public void testNGramPrefixGridLosAngeles() throws IOException { - SpatialContext ctx = SpatialContext.GEO; - TermQueryPrefixTreeStrategy prefixGridStrategy = new TermQueryPrefixTreeStrategy(new QuadPrefixTree(ctx), "geo"); + public void testNGramPrefixGridLosAngeles_flex() throws IOException { + testNGramPrefixGridLosAngelesWithTrie(new FlexPrefixTree2D(ctx)); + } + + @Test + public void testNGramPrefixGridLosAngeles_quad() throws IOException { + testNGramPrefixGridLosAngelesWithTrie(new QuadPrefixTree(ctx)); + } + + private void testNGramPrefixGridLosAngelesWithTrie(SpatialPrefixTree trie) throws IOException { + TermQueryPrefixTreeStrategy prefixGridStrategy = new TermQueryPrefixTreeStrategy(trie, "geo"); Shape point = ctx.makePoint(-118.243680, 34.052230); diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java index 9f53546..50aad6a 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java @@ -17,10 +17,13 @@ package org.apache.lucene.spatial.prefix.tree; * limitations under the License. */ +import com.carrotsearch.randomizedtesting.annotations.Repeat; import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.context.SpatialContextFactory; import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.ShapeCollection; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; @@ -38,11 +41,14 @@ import org.junit.Test; import java.util.ArrayList; import java.util.List; +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; + public class SpatialPrefixTreeTest extends SpatialTestCase { //TODO plug in others and test them private SpatialContext ctx; private SpatialPrefixTree trie; + final int ITERATIONS = 10; @Override @Before @@ -53,7 +59,7 @@ public class SpatialPrefixTreeTest extends SpatialTestCase { @Test public void testCellTraverse() { - trie = new GeohashPrefixTree(ctx,4); + trie = new GeohashPrefixTree(ctx, 4); Cell prevC = null; Cell c = trie.getWorldCell(); @@ -66,9 +72,9 @@ public class SpatialPrefixTreeTest extends SpatialTestCase { while (subCellsIter.hasNext()) { subCells.add(subCellsIter.next()); } - c = subCells.get(random().nextInt(subCells.size()-1)); - - assertEquals(prevC.getLevel()+1,c.getLevel()); + c = subCells.get(random().nextInt(subCells.size() - 1)); + + assertEquals(prevC.getLevel() + 1, c.getLevel()); Rectangle prevNShape = (Rectangle) prevC.getShape(); Shape s = c.getShape(); Rectangle sbox = s.getBoundingBox(); @@ -76,6 +82,7 @@ public class SpatialPrefixTreeTest extends SpatialTestCase { assertTrue(prevNShape.getHeight() > sbox.getHeight()); } } + /** * A PrefixTree pruning optimization gone bad, applicable when optimize=true. * See