Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1465678) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) @@ -19,6 +19,7 @@ import com.spatial4j.core.shape.Shape; import org.apache.lucene.search.Filter; +import org.apache.lucene.spatial.DisjointSpatialFilter; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialOperation; @@ -62,12 +63,14 @@ @Override public Filter makeFilter(SpatialArgs args) { + final SpatialOperation op = args.getOperation(); + if (op == SpatialOperation.IsDisjointTo) + return new DisjointSpatialFilter(this, args, getFieldName()); Shape shape = args.getShape(); int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct)); final boolean hasIndexedLeaves = true; - final SpatialOperation op = args.getOperation(); if (op == SpatialOperation.Intersects) { return new IntersectsPrefixTreeFilter( shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 1465679) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision ) @@ -24,6 +24,10 @@ import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; import com.spatial4j.core.shape.impl.RectangleImpl; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; import org.apache.lucene.search.Query; import org.apache.lucene.spatial.StrategyTestCase; import org.apache.lucene.spatial.prefix.tree.Cell; @@ -35,15 +39,22 @@ import org.junit.Test; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeSet; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; +import static com.spatial4j.core.shape.SpatialRelation.CONTAINS; +import static com.spatial4j.core.shape.SpatialRelation.DISJOINT; +import static com.spatial4j.core.shape.SpatialRelation.INTERSECTS; +import static com.spatial4j.core.shape.SpatialRelation.WITHIN; public class SpatialOpRecursivePrefixTreeTest extends StrategyTestCase { @@ -66,7 +77,7 @@ this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName()); //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid - System.out.println("Strategy: "+strategy.toString()); + System.out.println("Strategy: " + strategy.toString()); } @Test @@ -91,14 +102,21 @@ } @Test + @Repeat(iterations = 10) + public void testDisjoint() throws IOException { + mySetup(-1); + doTest(SpatialOperation.IsDisjointTo); + } + + @Test public void testWithinDisjointParts() throws IOException { mySetup(7); - //one shape comprised of two parts, quite separated apart - adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125))); + adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125), false)); commit(); //query surrounds only the second part of the indexed shape - Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(210, 245, 105, 128))); + Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin, + ctx.makeRectangle(210, 245, 105, 128))); SearchResults searchResults = executeQuery(query, 1); //we shouldn't find it because it's not completely within assertTrue(searchResults.numFound == 0); @@ -127,30 +145,73 @@ ), 1).numFound==1);//match } + //Override so we can index parts of a pair separately, resulting in the detailLevel + // being independent for each shape vs the whole thing + @Override + protected Document newDoc(String id, Shape shape) { + Document doc = new Document(); + doc.add(new StringField("id", id, Field.Store.YES)); + if (shape != null) { + Collection shapes; + if (shape instanceof ShapePair) { + shapes = new ArrayList<>(2); + shapes.add(((ShapePair)shape).shape1); + shapes.add(((ShapePair)shape).shape2); + } else { + shapes = Collections.singleton(shape); + } + for (Shape shapei : shapes) { + for (Field f : strategy.createIndexableFields(shapei)) { + doc.add(f); + } + } + if (storeShape) + doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape))); + } + return doc; + } + private void doTest(final SpatialOperation operation) throws IOException { + final boolean biasContains = (operation == SpatialOperation.Contains); + Map indexedShapes = new LinkedHashMap(); + Map indexedShapesGS = new LinkedHashMap(); final int numIndexedShapes = randomIntBetween(1, 6); for (int i = 0; i < numIndexedShapes; i++) { - String id = ""+i; + String id = "" + i; Shape indexedShape; - if (random().nextInt(4) == 0) { - indexedShape = new ShapePair( gridSnapp(randomRectangle()), gridSnapp(randomRectangle()) ); + Shape indexedShapeGS; //(grid-snapped) + int R = random().nextInt(12); + if (R == 0) {//1 in 10 + indexedShape = null; //no shape for this doc + indexedShapeGS = null; + } else if (R % 4 == 0) {//3 in 12 + //comprised of more than one shape + Rectangle shape1 = randomRectangle(); + Rectangle shape2 = randomRectangle(); + indexedShape = new ShapePair(shape1, shape2, biasContains); + indexedShapeGS = new ShapePair(gridSnap(shape1), gridSnap(shape2), biasContains); } else { - indexedShape = gridSnapp(randomRectangle()); + //just one shape + indexedShape = randomRectangle(); + indexedShapeGS = gridSnap(indexedShape); } indexedShapes.put(id, indexedShape); + indexedShapesGS.put(id, indexedShapeGS); + adoc(id, indexedShape); + if (random().nextInt(10) == 0) - commit(); - } + commit();//intermediate commit, produces extra segments - //delete some + } Iterator idIter = indexedShapes.keySet().iterator(); while (idIter.hasNext()) { String id = idIter.next(); if (random().nextInt(10) == 0) { deleteDoc(id); idIter.remove(); + indexedShapesGS.remove(id); } } @@ -160,35 +221,80 @@ for (int i = 0; i < numQueryShapes; i++) { int scanLevel = randomInt(grid.getMaxLevels()); ((RecursivePrefixTreeStrategy) strategy).setPrefixGridScanLevel(scanLevel); - Shape queryShape = gridSnapp(randomRectangle()); + final Shape queryShape = randomRectangle(); - //Generate truth via brute force - Set expectedIds = new TreeSet(); + final boolean DISJOINT = operation.equals(SpatialOperation.IsDisjointTo); + + //Generate truth via brute force: + // We really try to ensure true-positive matches (if the predicate on the raw shapes match + // then the search should find those same matches). + // approximations, false-positive matches + Set expectedIds = new LinkedHashSet();//true-positives + Set secondaryIds = new LinkedHashSet();//false-positives (unless disjoint) for (Map.Entry entry : indexedShapes.entrySet()) { - if (operation.evaluate(entry.getValue(), queryShape)) - expectedIds.add(entry.getKey()); + Shape indexedShapeCompare = entry.getValue(); + if (indexedShapeCompare == null) + continue; + Shape queryShapeCompare = queryShape; + String id = entry.getKey(); + if (operation.evaluate(indexedShapeCompare, queryShapeCompare)) { + expectedIds.add(id); + if (DISJOINT) { + //if no longer intersect after buffering them, for disjoint, remember this + indexedShapeCompare = indexedShapesGS.get(entry.getKey()); + queryShapeCompare = gridSnap(queryShape); + if (!operation.evaluate(indexedShapeCompare, queryShapeCompare)) + secondaryIds.add(id); - } + } + } else if (!DISJOINT) { + //buffer either the indexed or query shape (via gridSnap) and try again + if (operation.equals(SpatialOperation.Intersects)) { + indexedShapeCompare = indexedShapesGS.get(entry.getKey()); + queryShapeCompare = gridSnap(queryShape); + } else if (operation.equals(SpatialOperation.Contains)) { + indexedShapeCompare = indexedShapesGS.get(entry.getKey()); + } else if (operation.equals(SpatialOperation.IsWithin)) { + queryShapeCompare = gridSnap(queryShape); + } + if (operation.evaluate(indexedShapeCompare, queryShapeCompare)) + secondaryIds.add(id); + } + } //Search and verify results - Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape)); + SpatialArgs args = new SpatialArgs(operation, queryShape); + Query query = strategy.makeQuery(args); SearchResults got = executeQuery(query, 100); - Set remainingExpectedIds = new TreeSet(expectedIds); - String msg = queryShape.toString()+" Expect: "+expectedIds; + Set remainingExpectedIds = new LinkedHashSet(expectedIds); for (SearchResult result : got.results) { String id = result.getId(); - Object removed = remainingExpectedIds.remove(id); - if (removed == null) { - fail("Shouldn't match " + id + " ("+ indexedShapes.get(id) +") in " + msg); + boolean removed = remainingExpectedIds.remove(id); + if (!removed && (!DISJOINT && !secondaryIds.contains(id))) { + fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape); } } + if (DISJOINT) + remainingExpectedIds.removeAll(secondaryIds); if (!remainingExpectedIds.isEmpty()) { - Shape firstFailedMatch = indexedShapes.get(remainingExpectedIds.iterator().next()); - fail("Didn't match " + firstFailedMatch + " in " + msg +" (of "+remainingExpectedIds.size()+")"); + String id = remainingExpectedIds.iterator().next(); + fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape); } } } - protected Rectangle gridSnapp(Shape snapMe) { + private void fail(String label, String id, Map indexedShapes, Map indexedShapesGS, Shape queryShape) { + System.err.println("Ig:" + indexedShapesGS.get(id) + " Qg:" + gridSnap(queryShape)); + fail(label + " I #" + id + ":" + indexedShapes.get(id) + " Q:" + queryShape); + } + + +// private Rectangle inset(Rectangle r) { +// //typically inset by 1 (whole numbers are easy to read) +// double d = Math.min(1.0, grid.getDistanceForLevel(grid.getMaxLevels()) / 4); +// return ctx.makeRectangle(r.getMinX() + d, r.getMaxX() - d, r.getMinY() + d, r.getMaxY() - d); +// } + + protected Rectangle gridSnap(Shape snapMe) { //The next 4 lines mimic PrefixTreeStrategy.createIndexableFields() double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct(); double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx); @@ -210,27 +316,57 @@ return ctx.makeRectangle(minX, maxX, minY, maxY); } - /** An aggregate of 2 shapes. Only implements what's necessary for the test here. - * TODO replace with Spatial4j trunk ShapeCollection. */ + /** + * An aggregate of 2 shapes. Only implements what's necessary for the test + * here. TODO replace with Spatial4j trunk ShapeCollection. + */ private class ShapePair implements Shape { - Shape shape1, shape2; + final Rectangle shape1, shape2; + final boolean biasContainsThenWithin;//a hack - public ShapePair(Shape shape1, Shape shape2) { + public ShapePair(Rectangle shape1, Rectangle shape2, boolean containsThenWithin) { this.shape1 = shape1; this.shape2 = shape2; + biasContainsThenWithin = containsThenWithin; } @Override public SpatialRelation relate(Shape other) { - //easy to observe is correct; not an optimal code path but this is a test - if (shape1.relate(other) == SpatialRelation.CONTAINS || shape2.relate(other) == SpatialRelation.CONTAINS) - return SpatialRelation.CONTAINS; - if (shape1.relate(other) == SpatialRelation.WITHIN && shape2.relate(other) == SpatialRelation.WITHIN) - return SpatialRelation.WITHIN; + SpatialRelation r = relateApprox(other); + if (r != INTERSECTS) + return r; + //See if the correct answer is actually Contains + Rectangle oRect = (Rectangle)other; + boolean pairTouches = shape1.relate(shape2).intersects(); + if (!pairTouches) + return r; + //test all 4 corners + if (relate(ctx.makePoint(oRect.getMinX(), oRect.getMinY())) == CONTAINS + && relate(ctx.makePoint(oRect.getMinX(), oRect.getMaxY())) == CONTAINS + && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMinY())) == CONTAINS + && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMaxY())) == CONTAINS) + return CONTAINS; + return r; + } + + private SpatialRelation relateApprox(Shape other) { + if (biasContainsThenWithin) { + if (shape1.relate(other) == CONTAINS || shape1.equals(other) + || shape2.relate(other) == CONTAINS || shape2.equals(other)) return CONTAINS; + + if (shape1.relate(other) == WITHIN && shape2.relate(other) == WITHIN) return WITHIN; + + } else { + if ((shape1.relate(other) == WITHIN || shape1.equals(other)) + && (shape2.relate(other) == WITHIN || shape2.equals(other))) return WITHIN; + + if (shape1.relate(other) == CONTAINS || shape2.relate(other) == CONTAINS) return CONTAINS; + } + if (shape1.relate(other).intersects() || shape2.relate(other).intersects()) - return SpatialRelation.INTERSECTS; - return SpatialRelation.DISJOINT; + return INTERSECTS;//might actually be 'CONTAINS' if these 2 are adjacent + return DISJOINT; } @Override @@ -251,6 +387,11 @@ @Override public Point getCenter() { throw new UnsupportedOperationException("TODO unimplemented");//TODO + } + + @Override + public String toString() { + return "ShapePair(" + shape1 + " , " + shape2 + ")"; } } Index: lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java (revision ) +++ lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java (revision ) @@ -0,0 +1,113 @@ +package org.apache.lucene.spatial; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.ChainedFilter; +import org.apache.lucene.search.BitsFilteredDocIdSet; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Filter; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.util.Bits; + +import java.io.IOException; + +/** + * A Spatial Filter implementing {@link SpatialOperation#IsDisjointTo} in terms + * of a {@link SpatialStrategy}'s support for {@link SpatialOperation#Intersects}. + * A document is considered disjoint if it has spatial data that does not + * intersect with the query shape. Another way of looking at this is that it's + * a way to invert a query shape. + * + * @lucene.experimental + */ +public class DisjointSpatialFilter extends Filter { + + private final String field;//maybe null + private final Filter intersectsFilter; + + /** + * + * @param strategy Needed to compute intersects + * @param args Used in spatial intersection + * @param field This field is used to determine which docs have spatial data via + * {@link org.apache.lucene.search.FieldCache#getDocsWithField(org.apache.lucene.index.AtomicReader, String)}. + * Passing null will assume all docs have spatial data. + */ + public DisjointSpatialFilter(SpatialStrategy strategy, SpatialArgs args, String field) { + this.field = field; + + // TODO consider making SpatialArgs cloneable + SpatialOperation origOp = args.getOperation();//copy so we can restore + args.setOperation(SpatialOperation.Intersects);//temporarily set to intersects + intersectsFilter = strategy.makeFilter(args); + args.setOperation(origOp);//restore so it looks like it was + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DisjointSpatialFilter that = (DisjointSpatialFilter) o; + + if (field != null ? !field.equals(that.field) : that.field != null) + return false; + if (!intersectsFilter.equals(that.intersectsFilter)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = field != null ? field.hashCode() : 0; + result = 31 * result + intersectsFilter.hashCode(); + return result; + } + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { + Bits docsWithField; + if (field == null) { + docsWithField = null;//all docs + } else { + //NOTE By using the FieldCache we re-use a cache + // which is nice but loading it in this way might be slower than say using an + // intersects filter against the world bounds. So do we add a method to the + // strategy, perhaps? But the strategy can't cache it. + docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field); + + final int maxDoc = context.reader().maxDoc(); + if (docsWithField.length() != maxDoc ) + throw new IllegalStateException("Bits length should be maxDoc ("+maxDoc+") but wasn't: "+docsWithField); + + if (docsWithField instanceof Bits.MatchNoBits) { + return null;//match nothing + } else if (docsWithField instanceof Bits.MatchAllBits) { + docsWithField = null;//all docs + } + } + + //not so much a chain but a way to conveniently invert the Filter + DocIdSet docIdSet = new ChainedFilter(new Filter[]{intersectsFilter}, ChainedFilter.ANDNOT).getDocIdSet(context, acceptDocs); + return BitsFilteredDocIdSet.wrap(docIdSet, docsWithField); + } + +}