Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1463203) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) @@ -76,6 +76,8 @@ return new WithinPrefixTreeFilter( shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, -1);//-1 flag is slower but ensures correct results + } else if (op == SpatialOperation.Contains) { + return new ContainsPrefixTreeFilter(shape, getFieldName(), grid, detailLevel); } throw new UnsupportedSpatialOperation(op); } Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 1463203) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision ) @@ -82,6 +82,13 @@ } @Test + @Repeat(iterations = 10) + public void testContains() throws IOException { + mySetup(); + doTest(SpatialOperation.Contains); + } + + @Test public void testWithinDisjointParts() throws IOException { this.ctx = new SpatialContext(false, null, new RectangleImpl(0, 256, -128, 128, null)); //A fairly shallow grid, and default 2.5% distErrPct @@ -211,7 +218,7 @@ @Override public boolean hasArea() { - throw new UnsupportedOperationException("TODO unimplemented");//TODO + return true; } @Override Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision ) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision ) @@ -0,0 +1,259 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.spatial.prefix.tree.Cell; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.SentinelIntSet; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +/** + * Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains + * CONTAINS} the query shape. + */ +public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter { + + public ContainsPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel) { + super(queryShape, fieldName, grid, detailLevel); + } + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return new ContainsVisitor(context, acceptDocs).visit(grid.getWorldCell(), acceptDocs); + } + + private class ContainsVisitor extends BaseTermsEnumTraverser { + + public ContainsVisitor(AtomicReaderContext context, Bits acceptDocs) throws IOException { + super(context, acceptDocs); + } + + BytesRef termBytes = new BytesRef(); + Cell nextCell;//see getLeafDocs + + /** This is the primary algorithm; recursive. Returns null if finds none. */ + private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException { + + if (termsEnum == null)//signals all done + return null; + + //Leaf docs match all query shape + SmallDocSet leafDocs = getLeafDocs(cell, acceptContains); + + // Get the AND of all child results + SmallDocSet combinedSubResults = null; + Collection subCells = cell.getSubCells(queryShape); + for (Cell subCell : subCells) { + if (!seekExact(subCell)) + combinedSubResults = null; + else if (subCell.getLevel() == detailLevel) + combinedSubResults = getDocs(subCell, acceptContains); + else if (subCell.getShapeRel() == SpatialRelation.WITHIN) + combinedSubResults = getLeafDocs(subCell, acceptContains); + else + combinedSubResults = visit(subCell, acceptContains); //recursion + + if (combinedSubResults == null) + break; + acceptContains = combinedSubResults;//has the 'AND' effect on next iteration + } + + // Result: OR the leaf docs with AND of all child results + if (combinedSubResults != null) { + if (leafDocs == null) + return combinedSubResults; + return leafDocs.union(combinedSubResults); + } + return leafDocs; + } + + private boolean seekExact(Cell cell) throws IOException { + assert new BytesRef(cell.getTokenBytes()).compareTo(termBytes) > 0; + + termBytes.bytes = cell.getTokenBytes(); + termBytes.length = termBytes.bytes.length; + return termsEnum.seekExact(termBytes, cell.getLevel() <= 2); + } + + private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException { + assert new BytesRef(cell.getTokenBytes()).equals(termBytes); + + return collectDocs(acceptContains); + } + + private SmallDocSet getLeafDocs(Cell leafCell, Bits acceptContains) throws IOException { + assert new BytesRef(leafCell.getTokenBytes()).equals(termBytes); + + BytesRef nextTerm = termsEnum.next(); + if (nextTerm == null) { + termsEnum = null;//signals all done + return null; + } + nextCell = grid.getCell(nextTerm.bytes, nextTerm.offset, nextTerm.length, nextCell); + if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) { + return collectDocs(acceptContains); + } else { + return null; + } + } + + private SmallDocSet collectDocs(Bits acceptContains) throws IOException { + SmallDocSet set = null; + + docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE); + int docid; + while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (set == null) { + int size = termsEnum.docFreq(); + if (size <= 0) + size = 16; + set = new SmallDocSet(size); + } + set.set(docid); + } + return set; + } + + }//class ContainsVisitor + + /** A hash based mutable set of docIds. If this were Solr code then we might + * use a combination of HashDocSet and SortedIntDocSet instead. */ + private static class SmallDocSet extends DocIdSet implements Bits { + + private final SentinelIntSet intSet; + private int maxInt = 0; + + public SmallDocSet(int size) { + intSet = new SentinelIntSet(size, -1); + } + + @Override + public boolean get(int index) { + return intSet.exists(index); + } + + public void set(int index) { + intSet.put(index); + if (index > maxInt) + maxInt = index; + } + + /** Largest docid. */ + @Override + public int length() { + return maxInt; + } + + /** Number of docids. */ + public int size() { + return intSet.size(); + } + + /** NOTE: modifies and returns either "this" or "other" */ + public SmallDocSet union(SmallDocSet other) { + SmallDocSet bigger; + SmallDocSet smaller; + if (other.intSet.size() > this.intSet.size()) { + bigger = other; + smaller = this; + } else { + bigger = this; + smaller = other; + } + //modify bigger + for (int v : smaller.intSet.keys) { + if (v == smaller.intSet.emptyVal) + continue; + bigger.set(v); + } + return bigger; + } + + @Override + public Bits bits() throws IOException { + //if the # of docids is super small, return null since iteration is going + // to be faster + return size() > 4 ? this : null; + } + + @Override + public DocIdSetIterator iterator() throws IOException { + if (size() == 0) + return null; + //copy the unsorted values to a new array then sort them + int d = 0; + final int[] docs = new int[intSet.size()]; + for (int v : intSet.keys) { + if (v == intSet.emptyVal) + continue; + docs[d++] = v; + } + assert d == intSet.size(); + final int size = d; + + //sort them + Arrays.sort(docs, 0, size); + + return new DocIdSetIterator() { + int idx = -1; + @Override + public int docID() { + if (idx >= 0 && idx < size) + return docs[idx]; + else + return -1; + } + + @Override + public int nextDoc() throws IOException { + if (++idx < size) + return docs[idx]; + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + //for this small set this is likely faster vs. a binary search + // into the sorted array + int doc; + while ((doc = nextDoc()) < target) { + } + return doc; + } + + @Override + public long cost() { + return size; + } + }; + } + + }//class SmallDocSet + +}