Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/HeatmapFacetCounterTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/HeatmapFacetCounterTest.java (revision 1664352) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/HeatmapFacetCounterTest.java (revision ) @@ -31,7 +31,6 @@ import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; import com.spatial4j.core.shape.impl.RectangleImpl; - import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -235,7 +234,7 @@ // we use IntersectsPrefixTreeFilter directly so that we can specify the level to go to exactly. RecursivePrefixTreeStrategy strategy = (RecursivePrefixTreeStrategy) this.strategy; Filter filter = new IntersectsPrefixTreeFilter( - pt, strategy.getFieldName(), grid, facetLevel, grid.getMaxLevels(), !strategy.isPointsOnly()); + pt, strategy.getFieldName(), grid, facetLevel, grid.getMaxLevels()); final TotalHitCountCollector collector = new TotalHitCountCollector(); indexSearcher.search(new FilteredQuery(new MatchAllDocsQuery(), filter), collector); cellsValidated++; \ No newline at end of file Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision ) @@ -17,14 +17,6 @@ * limitations under the License. */ -import com.spatial4j.core.context.SpatialContext; -import com.spatial4j.core.shape.Point; -import com.spatial4j.core.shape.Rectangle; -import com.spatial4j.core.shape.Shape; -import com.spatial4j.core.shape.SpatialRelation; - -import org.apache.lucene.util.BytesRef; - import java.io.PrintStream; import java.text.NumberFormat; import java.util.ArrayList; @@ -33,6 +25,13 @@ import java.util.List; import java.util.Locale; +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Rectangle; +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; +import org.apache.lucene.util.BytesRef; + /** * A {@link SpatialPrefixTree} which uses a * quad tree in which an @@ -229,6 +228,9 @@ @Override protected QuadPrefixTree getGrid() { return QuadPrefixTree.this; } + + @Override + protected int getMaxLevels() { return maxLevels; } @Override protected Collection getSubCells() { Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java (revision ) @@ -17,14 +17,14 @@ * limitations under the License. */ +import java.util.Collection; + import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; -import java.util.Collection; - /** The base for the original two SPT's: Geohash and Quad. Don't subclass this for new SPTs. * @lucene.internal */ //public for RPT pruneLeafyBranches code @@ -64,7 +64,7 @@ shape = null; this.bytes = bytes.bytes; this.b_off = bytes.offset; - this.b_len = bytes.length; + this.b_len = (short) bytes.length; readLeafAdjust(); } @@ -72,6 +72,8 @@ isLeaf = (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE); if (isLeaf) b_len--; + if (getLevel() == getMaxLevels()) + isLeaf = true; } // @Override @@ -88,6 +90,8 @@ protected abstract SpatialPrefixTree getGrid(); + protected abstract int getMaxLevels(); + @Override public SpatialRelation getShapeRel() { return shapeRel; @@ -111,7 +115,7 @@ @Override public BytesRef getTokenBytesWithLeaf(BytesRef result) { result = getTokenBytesNoLeaf(result); - if (!isLeaf) + if (!isLeaf || getLevel() == getMaxLevels()) return result; if (result.bytes.length < result.offset + result.length + 1) { assert false : "Not supposed to happen; performance bug"; Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) @@ -18,11 +18,11 @@ */ import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Shape; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.search.Filter; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.CellIterator; @@ -116,13 +116,13 @@ } @Override - protected TokenStream createTokenStream(Shape shape, int detailLevel) { + protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { if (shape instanceof Point || !pruneLeafyBranches) - return super.createTokenStream(shape, detailLevel); + return super.createCellIteratorToIndex(shape, detailLevel, reuse); List cells = new ArrayList<>(4096); recursiveTraverseAndPrune(grid.getWorldCell(), shape, detailLevel, cells); - return new CellTokenStream().setCells(cells.iterator()); + return cells.iterator(); } /** Returns true if cell was added as a leaf. If it wasn't it recursively descends. */ @@ -174,10 +174,10 @@ if (op == SpatialOperation.Intersects) { return new IntersectsPrefixTreeFilter( - shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, !pointsOnly); + shape, getFieldName(), grid, detailLevel, prefixGridScanLevel); } else if (op == SpatialOperation.IsWithin) { return new WithinPrefixTreeFilter( - shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, !pointsOnly, + shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, -1);//-1 flag is slower but ensures correct results } else if (op == SpatialOperation.Contains) { return new ContainsPrefixTreeFilter(shape, getFieldName(), grid, detailLevel, Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java (revision ) @@ -60,15 +60,15 @@ private final Shape bufferedQueryShape;//if null then the whole world /** - * See {@link AbstractVisitingPrefixTreeFilter#AbstractVisitingPrefixTreeFilter(com.spatial4j.core.shape.Shape, String, org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree, int, int, boolean)}. + * See {@link AbstractVisitingPrefixTreeFilter#AbstractVisitingPrefixTreeFilter(com.spatial4j.core.shape.Shape, String, org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree, int, int)}. * {@code queryBuffer} is the (minimum) distance beyond the query shape edge * where non-matching documents are looked for so they can be excluded. If * -1 is used then the whole world is examined (a good default for correctness). */ public WithinPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, - int detailLevel, int prefixGridScanLevel, boolean hasIndexedLeaves, + int detailLevel, int prefixGridScanLevel, double queryBuffer) { - super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel, hasIndexedLeaves); + super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel); this.bufferedQueryShape = queryBuffer == -1 ? null : bufferShape(queryShape, queryBuffer); } @@ -94,11 +94,10 @@ @Override public String toString(String field) { return "WithinPrefixTreeFilter(" + - // TODO: print something about the shape? "fieldName=" + fieldName + "," + + "queryShape=" + queryShape + "," + "detailLevel=" + detailLevel + "," + - "prefixGridScanLevel=" + prefixGridScanLevel + "," + - "hasIndexedLeaves=" + hasIndexedLeaves + + "prefixGridScanLevel=" + prefixGridScanLevel + ")"; } @@ -152,7 +151,6 @@ return new VisitorTemplate(context, acceptDocs) { private FixedBitSet inside; private FixedBitSet outside; - private SpatialRelation visitRelation; @Override protected void start() { @@ -173,32 +171,26 @@ } @Override - protected boolean visit(Cell cell) throws IOException { + protected boolean visitPrefix(Cell cell) throws IOException { //cell.relate is based on the bufferedQueryShape; we need to examine what // the relation is against the queryShape - visitRelation = cell.getShape().relate(queryShape); - if (visitRelation == SpatialRelation.WITHIN) { + SpatialRelation visitRelation = cell.getShape().relate(queryShape); + if (cell.getLevel() == detailLevel) { + collectDocs(visitRelation.intersects() ? inside : outside); + return false; + } else if (visitRelation == SpatialRelation.WITHIN) { collectDocs(inside); return false; } else if (visitRelation == SpatialRelation.DISJOINT) { collectDocs(outside); return false; - } else if (cell.getLevel() == detailLevel) { - collectDocs(inside); - return false; } return true; } @Override protected void visitLeaf(Cell cell) throws IOException { - //visitRelation is declared as a field, populated by visit() so we don't recompute it. - // We have a specialized visitScanned() which doesn't call this. If we didn't, we would - // not be able to assume visitRelation is from a prior visit() call since in scanning, - // parent cells aren't visited. - assert detailLevel != cell.getLevel(); - assert visitRelation == cell.getShape().relate(queryShape); - if (allCellsIntersectQuery(cell, visitRelation)) + if (allCellsIntersectQuery(cell)) collectDocs(inside); else collectDocs(outside); @@ -207,9 +199,8 @@ /** Returns true if the provided cell, and all its sub-cells down to * detailLevel all intersect the queryShape. */ - private boolean allCellsIntersectQuery(Cell cell, SpatialRelation relate/*cell to query*/) { - if (relate == null) - relate = cell.getShape().relate(queryShape); + private boolean allCellsIntersectQuery(Cell cell) { + SpatialRelation relate = cell.getShape().relate(queryShape); if (cell.getLevel() == detailLevel) return relate.intersects(); if (relate == SpatialRelation.WITHIN) @@ -221,7 +212,7 @@ CellIterator subCells = cell.getNextLevelCells(null); while (subCells.hasNext()) { Cell subCell = subCells.next(); - if (!allCellsIntersectQuery(subCell, null))//recursion + if (!allCellsIntersectQuery(subCell))//recursion return false; } return true; @@ -229,12 +220,12 @@ @Override protected void visitScanned(Cell cell) throws IOException { - //slightly optimize over default impl; required for our 'visitRelation' field re-use above - if (allCellsIntersectQuery(cell, null)) { - collectDocs(inside); - } else { - collectDocs(outside); - } + visitLeaf(cell);//collects as we want, even if not a leaf +// if (cell.isLeaf()) { +// visitLeaf(cell); +// } else { +// visitPrefix(cell); +// } } }.getDocIdSet(); Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision ) @@ -20,6 +20,8 @@ import java.util.ArrayList; import java.util.List; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Shape; import org.apache.lucene.queries.TermsQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.QueryWrapperFilter; @@ -32,9 +34,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import com.spatial4j.core.shape.Point; -import com.spatial4j.core.shape.Shape; - /** * A basic implementation of {@link PrefixTreeStrategy} using a large * {@link TermsQuery} of all the cells from @@ -56,6 +55,20 @@ } @Override + protected CellToBytesRefIterator newCellToBytesRefIterator() { + //Ensure we don't have leaves, as this strategy doesn't handle them. + return new CellToBytesRefIterator() { + @Override + public BytesRef next() { + if (!cellIter.hasNext()) { + return null; + } + return cellIter.next().getTokenBytesNoLeaf(bytesRef); + } + }; + } + + @Override public Filter makeFilter(SpatialArgs args) { final SpatialOperation op = args.getOperation(); if (op != SpatialOperation.Intersects) @@ -92,7 +105,8 @@ for (BytesRef byteRef : terms) { byteRef.bytes = masterBytes.bytes(); } - //unfortunately TermsFilter will needlessly sort & dedupe + //unfortunately TermsQuery will needlessly sort & dedupe + //TODO an automatonQuery might be faster? return new QueryWrapperFilter(new TermsQuery(getFieldName(), terms)); } Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision ) @@ -17,6 +17,10 @@ * limitations under the License. */ +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.io.GeohashUtils; import com.spatial4j.core.shape.Point; @@ -24,10 +28,6 @@ import com.spatial4j.core.shape.Shape; import org.apache.lucene.util.BytesRef; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - /** * A {@link SpatialPrefixTree} based on * Geohashes. @@ -104,7 +104,7 @@ GhCell(String geohash) { super(stringToBytesPlus1(geohash), 0, geohash.length()); this.geohash = geohash; - if (isLeaf()) + if (isLeaf() && getLevel() < getMaxLevels())//we don't have a leaf byte at max levels (an opt) this.geohash = geohash.substring(0, geohash.length() - 1); } @@ -114,6 +114,9 @@ @Override protected GeohashPrefixTree getGrid() { return GeohashPrefixTree.this; } + + @Override + protected int getMaxLevels() { return maxLevels; } @Override protected void readCell(BytesRef bytesRef) { Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision ) @@ -39,8 +39,8 @@ public IntersectsPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel, - int prefixGridScanLevel, boolean hasIndexedLeaves) { - super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel, hasIndexedLeaves); + int prefixGridScanLevel) { + super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel); } @Override @@ -68,7 +68,7 @@ } @Override - protected boolean visit(Cell cell) throws IOException { + protected boolean visitPrefix(Cell cell) throws IOException { if (cell.getShapeRel() == SpatialRelation.WITHIN || cell.getLevel() == detailLevel) { collectDocs(results); return false; @@ -87,11 +87,10 @@ @Override public String toString(String field) { return "IntersectsPrefixTreeFilter(" + - // TODO: print something about the shape? "fieldName=" + fieldName + "," + + "queryShape=" + queryShape + "," + "detailLevel=" + detailLevel + "," + - "prefixGridScanLevel=" + prefixGridScanLevel + "," + - "hasIndexedLeaves=" + hasIndexedLeaves + + "prefixGridScanLevel=" + prefixGridScanLevel + ")"; } Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellToBytesRefIterator.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellToBytesRefIterator.java (revision ) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellToBytesRefIterator.java (revision ) @@ -0,0 +1,50 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Iterator; + +import org.apache.lucene.spatial.prefix.tree.Cell; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; + +/** + * A reset'able {@link org.apache.lucene.util.BytesRefIterator} wrapper around + * an {@link java.util.Iterator} of {@link org.apache.lucene.spatial.prefix.tree.Cell}s. + * + * @see PrefixTreeStrategy#newCellToBytesRefIterator() + * + * @lucene.internal + */ +public class CellToBytesRefIterator implements BytesRefIterator { + + protected Iterator cellIter; + protected BytesRef bytesRef = new BytesRef(); + + public void reset(Iterator cellIter) { + this.cellIter = cellIter; + } + + @Override + public BytesRef next() { + if (!cellIter.hasNext()) { + return null; + } + return cellIter.next().getTokenBytesWithLeaf(bytesRef); + } +} Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java (revision 1664352) +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/RandomSpatialOpFuzzyPrefixTreeTest.java (revision ) @@ -226,6 +226,7 @@ return doc; } + @SuppressWarnings("fallthrough") private void doTest(final SpatialOperation operation) throws IOException { //first show that when there's no data, a query will result in no results { Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java (revision ) @@ -17,14 +17,14 @@ * limitations under the License. */ +import java.util.Arrays; + import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; import org.apache.lucene.util.BytesRef; -import java.util.Arrays; - /** The base for the original two SPT's: Geohash and Quad. Don't subclass this for new SPTs. * @lucene.internal */ abstract class LegacyPrefixTree extends SpatialPrefixTree { @@ -69,7 +69,7 @@ // subcells from the bytesRef in a loop. This avoids an O(N^2) encode, and we have O(N) instead. Cell cell = getCell((Point) shape, detailLevel); - assert !cell.isLeaf() && cell instanceof LegacyCell; + assert cell instanceof LegacyCell; BytesRef fullBytes = cell.getTokenBytesNoLeaf(null); //fill in reverse order to be sorted Cell[] cells = new Cell[detailLevel]; Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java (revision ) @@ -19,14 +19,13 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Iterator; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Shape; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.Filter; @@ -59,11 +58,9 @@ } @Override - public Field[] createIndexableFields(Shape shape) { + protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { //levels doesn't actually matter; NumberRange based Shapes have their own "level". - TokenStream tokenStream = createTokenStream(shape, grid.getMaxLevels()); - Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); - return new Field[]{field}; + return super.createCellIteratorToIndex(shape, grid.getMaxLevels(), reuse); } /** Unsupported. */ Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java (revision ) @@ -17,102 +17,68 @@ * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.AttributeImpl; -import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; -import java.io.IOException; -import java.util.Iterator; -import java.util.Objects; - /** * A TokenStream used internally by {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}. * - * This is highly modelled after {@link org.apache.lucene.analysis.NumericTokenStream}. + * This is modelled after {@link org.apache.lucene.analysis.NumericTokenStream}. * - * If there is demand for it to be public; it could be made to be. - * * @lucene.internal + * TODO nocommit rename to BytesRefIteratorTokenStream */ class CellTokenStream extends TokenStream { - private interface CellTermAttribute extends Attribute { - Cell getCell(); - void setCell(Cell cell); - - //TODO one day deprecate this once we have better encodings - boolean getOmitLeafByte(); - void setOmitLeafByte(boolean b); - } - - // just a wrapper to prevent adding CTA - private static final class CellAttributeFactory extends AttributeFactory { + // just a wrapper to prevent adding CharTermAttribute + private static final class BRAttributeFactory extends AttributeFactory { private final AttributeFactory delegate; - CellAttributeFactory(AttributeFactory delegate) { + BRAttributeFactory(AttributeFactory delegate) { this.delegate = delegate; } @Override public AttributeImpl createAttributeInstance(Class attClass) { if (CharTermAttribute.class.isAssignableFrom(attClass)) - throw new IllegalArgumentException("CellTokenStream does not support CharTermAttribute."); + throw new IllegalArgumentException(getClass() + " does not support CharTermAttribute."); return delegate.createAttributeInstance(attClass); } } - private static final class CellTermAttributeImpl extends AttributeImpl - implements CellTermAttribute, TermToBytesRefAttribute { - private BytesRef bytes = new BytesRef(); - private Cell cell; - private boolean omitLeafByte;//false by default (whether there's a leaf byte or not) + private static final class BRTermToBytesRefAttributeImpl extends AttributeImpl + implements TermToBytesRefAttribute { + private final BytesRef bytes = new BytesRef(); - @Override - public Cell getCell() { - return cell; + void setBytesRef(BytesRef inputBytes) { + // shallow clone. this.bytesRef is final + bytes.bytes = inputBytes.bytes; + bytes.offset = inputBytes.offset; + bytes.length = inputBytes.length; } @Override - public boolean getOmitLeafByte() { - return omitLeafByte; - } - - @Override - public void setCell(Cell cell) { - this.cell = cell; - omitLeafByte = false;//reset - } - - @Override - public void setOmitLeafByte(boolean b) { - omitLeafByte = b; - } - - @Override public void clear() { - // this attribute has no contents to clear! - // we keep it untouched as it's fully controlled by outer class. + // we keep it untouched as it's fully controlled by the outer class. } @Override public void copyTo(AttributeImpl target) { - final CellTermAttribute a = (CellTermAttribute) target; - a.setCell(cell); - a.setOmitLeafByte(omitLeafByte); + final BRTermToBytesRefAttributeImpl a = (BRTermToBytesRefAttributeImpl) target; + a.setBytesRef(BytesRef.deepCopyOf(bytes)); } @Override public void fillBytesRef() { - if (omitLeafByte) - cell.getTokenBytesNoLeaf(bytes); - else - cell.getTokenBytesWithLeaf(bytes); + //nothing to do; it's populated by incrementToken } @Override @@ -121,21 +87,15 @@ } @Override - public void reflectWith(AttributeReflector reflector) { - fillBytesRef(); - reflector.reflect(TermToBytesRefAttribute.class, "bytes", BytesRef.deepCopyOf(bytes)); - } - - @Override - public CellTermAttributeImpl clone() { - final CellTermAttributeImpl clone = (CellTermAttributeImpl) super.clone(); - clone.bytes = BytesRef.deepCopyOf(bytes); + public BRTermToBytesRefAttributeImpl clone() { + final BRTermToBytesRefAttributeImpl clone = (BRTermToBytesRefAttributeImpl) super.clone(); + clone.setBytesRef(BytesRef.deepCopyOf(bytes)); return clone; } @Override public int hashCode() { - return Objects.hash(cell, omitLeafByte); + return bytes.hashCode(); } @Override @@ -143,66 +103,56 @@ if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - CellTermAttributeImpl other = (CellTermAttributeImpl) obj; - if (cell == null) { - if (other.cell != null) return false; - } else if (!cell.equals(other.cell)) return false; - if (omitLeafByte != other.omitLeafByte) return false; + BRTermToBytesRefAttributeImpl other = (BRTermToBytesRefAttributeImpl) obj; + if (!bytes.equals(other.bytes)) return false; return true; } } public CellTokenStream() { - this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); + super(new BRAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); + addAttributeImpl(new BRTermToBytesRefAttributeImpl());//because non-public constructor + bytesAtt = (BRTermToBytesRefAttributeImpl) addAttribute(TermToBytesRefAttribute.class); } - public CellTokenStream(AttributeFactory factory) { - super(new CellAttributeFactory(factory)); + public BytesRefIterator getBytesRefIterator() { + return bytesIter; } - public CellTokenStream setCells(Iterator iter) { - this.iter = iter; + public CellTokenStream setBytesRefIterator(BytesRefIterator iter) { + this.bytesIter = iter; return this; } @Override public void reset() throws IOException { - if (iter == null) - throw new IllegalStateException("call setCells() before usage"); - cellAtt.setCell(null); - cellAtt.setOmitLeafByte(false); + if (bytesIter == null) + throw new IllegalStateException("call setBytesRefIterator() before usage"); + bytesAtt.getBytesRef().length = 0; } - /** Outputs the token of a cell, and if it's a leaf, outputs it again with the leaf byte. */ @Override - public final boolean incrementToken() { - if (iter == null) - throw new IllegalStateException("call setCells() before usage"); + public final boolean incrementToken() throws IOException { + if (bytesIter == null) + throw new IllegalStateException("call setBytesRefIterator() before usage"); // this will only clear all other attributes in this TokenStream - clearAttributes(); + clearAttributes();//TODO but there should be no "other" attributes - if (cellAtt.getOmitLeafByte()) { - cellAtt.setOmitLeafByte(false); - return true; - } - //get next + // get next - if (!iter.hasNext()) + BytesRef bytes = bytesIter.next(); + if (bytes == null) { return false; - cellAtt.setCell(iter.next()); - if (cellAtt.getCell().isLeaf()) - cellAtt.setOmitLeafByte(true); + } else { + bytesAtt.setBytesRef(bytes); + //note: we don't bother setting posInc or type attributes. There's no point to it. - return true; - } + return true; + } - - { - addAttributeImpl(new CellTermAttributeImpl());//because non-public constructor } + //members - private final CellTermAttribute cellAtt = addAttribute(CellTermAttribute.class); + private final BRTermToBytesRefAttributeImpl bytesAtt; - //TODO support position increment, and TypeAttribute - - private Iterator iter = null; // null means not initialized + private BytesRefIterator bytesIter = null; // null means not initialized } Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java (revision ) @@ -51,7 +51,6 @@ * Some cells are flagged as leaves, which are indexed as such. A leaf cell is either within some * shape or it both intersects and the cell is at an accuracy threshold such that no smaller cells * for the shape will be represented. - * further cells with this prefix for the shape (always true at maxLevels). */ boolean isLeaf(); @@ -62,14 +61,14 @@ void setLeaf(); /** - * Returns the bytes for this cell, with a leaf byte if this is a leaf cell. + * Returns the bytes for this cell, with a leaf byte if this is a leaf cell. * The result param is used to save object allocation, though its bytes aren't used. * @param result where the result goes, or null to create new */ BytesRef getTokenBytesWithLeaf(BytesRef result); /** - * Returns the bytes for this cell, without leaf set. The bytes should sort before + * Returns the bytes for this cell, without a leaf set. The bytes should sort before * {@link #getTokenBytesWithLeaf(org.apache.lucene.util.BytesRef)}. * The result param is used to save object allocation, though its bytes aren't used. * @param result where the result goes, or null to create new Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision ) @@ -17,11 +17,14 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Arrays; + import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.spatial.prefix.tree.Cell; @@ -32,9 +35,6 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.SentinelIntSet; -import java.io.IOException; -import java.util.Arrays; - /** * Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains * CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}. @@ -43,11 +43,6 @@ */ public class ContainsPrefixTreeFilter extends AbstractPrefixTreeFilter { - /* - Future optimizations: - Instead of seekExact, use seekCeil with some leap-frogging, like Intersects does. - */ - /** * If the spatial data for a document is comprised of multiple overlapping or adjacent parts, * it might fail to match a query shape when doing the CONTAINS predicate when the sum of @@ -77,8 +72,8 @@ @Override public String toString(String field) { return "ContainsPrefixTreeFilter(" + - // TODO: print something about the shape? "fieldName=" + fieldName + "," + + "queryShape=" + queryShape + "," + "detailLevel=" + detailLevel + "," + "multiOverlappingIndexedShapes=" + multiOverlappingIndexedShapes + ")"; @@ -93,21 +88,21 @@ public ContainsVisitor(LeafReaderContext context, Bits acceptDocs) throws IOException { super(context, acceptDocs); + if (termsEnum != null) { + nextTerm();//advance to first - } + } + } - //The reused value of cell.getTokenBytesNoLeaf which is always then seek()'ed to. It's used in assertions too. - BytesRef termBytes = new BytesRef();//no leaf - Cell nextCell;//see getLeafDocs + BytesRef seekTerm = new BytesRef();//temp; see seek() + BytesRef thisTerm;//current term in termsEnum + Cell indexedCell;//the cell wrapper around thisTerm /** This is the primary algorithm; recursive. Returns null if finds none. */ private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException { - if (termsEnum == null)//signals all done + if (thisTerm == null)//signals all done return null; - // Leaf docs match all query shape - SmallDocSet leafDocs = getLeafDocs(cell, acceptContains); - // Get the AND of all child results (into combinedSubResults) SmallDocSet combinedSubResults = null; // Optimization: use null subCellsFilter when we know cell is within the query shape. @@ -119,68 +114,105 @@ CellIterator subCells = cell.getNextLevelCells(subCellsFilter); while (subCells.hasNext()) { Cell subCell = subCells.next(); - if (!seekExact(subCell)) + if (!seek(subCell)) { combinedSubResults = null; - else if (subCell.getLevel() == detailLevel) + } else if (subCell.getLevel() == detailLevel) { combinedSubResults = getDocs(subCell, acceptContains); - else if (!multiOverlappingIndexedShapes && - subCell.getShapeRel() == SpatialRelation.WITHIN) + } else if (!multiOverlappingIndexedShapes && + subCell.getShapeRel() == SpatialRelation.WITHIN) { combinedSubResults = getLeafDocs(subCell, acceptContains); - else - combinedSubResults = visit(subCell, acceptContains); //recursion + } else { + //OR the leaf docs with all child results + SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains); + SmallDocSet subDocs = visit(subCell, acceptContains); //recursion + combinedSubResults = union(leafDocs, subDocs); + } if (combinedSubResults == null) break; acceptContains = combinedSubResults;//has the 'AND' effect on next iteration } - // Result: OR the leaf docs with AND of all child results - if (combinedSubResults != null) { - if (leafDocs == null) - return combinedSubResults; + return combinedSubResults; - return leafDocs.union(combinedSubResults);//union is 'or' - } + } - return leafDocs; - } - private boolean seekExact(Cell cell) throws IOException { - assert cell.getTokenBytesNoLeaf(null).compareTo(termBytes) > 0; - if (termsEnum == null) + private boolean seek(Cell cell) throws IOException { + if (thisTerm == null) return false; - termBytes = cell.getTokenBytesNoLeaf(termBytes); - assert assertCloneTermBytes(); //assertions look at termBytes later on - return termsEnum.seekExact(termBytes); + final int compare = indexedCell.compareToNoLeaf(cell); + if (compare > 0) { + return false;//leap-frog effect + } else if (compare == 0) { + return true; // already there! + } else {//compare > 0 + //seek! + seekTerm = cell.getTokenBytesNoLeaf(seekTerm); + final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm); + if (seekStatus == TermsEnum.SeekStatus.END) { + thisTerm = null;//all done + return false; - } + } - - private boolean assertCloneTermBytes() { - termBytes = BytesRef.deepCopyOf(termBytes); + thisTerm = termsEnum.term(); + indexedCell = grid.readCell(thisTerm, indexedCell); + if (seekStatus == TermsEnum.SeekStatus.FOUND) { - return true; - } + return true; + } + return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0; + } + } + /** Get prefix & leaf docs at this cell. */ private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException { - assert cell.getTokenBytesNoLeaf(null).equals(termBytes); - - return collectDocs(acceptContains); + assert indexedCell.compareToNoLeaf(cell) == 0; + //called when we've reached detailLevel. + if (indexedCell.isLeaf()) {//only a leaf + SmallDocSet result = collectDocs(acceptContains); + nextTerm(); + return result; + } else { + SmallDocSet docsAtPrefix = collectDocs(acceptContains); + if (!nextTerm()) { + return docsAtPrefix; - } + } + //collect leaf too + if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) { + SmallDocSet docsAtLeaf = collectDocs(acceptContains); + nextTerm(); + return union(docsAtPrefix, docsAtLeaf); + } else { + return docsAtPrefix; + } + } + } /** Gets docs on the leaf of the given cell, _if_ there is a leaf cell, otherwise null. */ private SmallDocSet getLeafDocs(Cell cell, Bits acceptContains) throws IOException { - assert cell.getTokenBytesNoLeaf(null).equals(termBytes); - - if (termsEnum == null) + assert indexedCell.compareToNoLeaf(cell) == 0; + //Advance past prefix if we're at a prefix; return null if no leaf + if (!indexedCell.isLeaf()) { + if (!nextTerm() || !indexedCell.isLeaf() || indexedCell.getLevel() != cell.getLevel()) { - return null; + return null; - BytesRef nextTerm = termsEnum.next(); - if (nextTerm == null) { - termsEnum = null;//signals all done - return null; - } + } - nextCell = grid.readCell(nextTerm, nextCell); - assert cell.isPrefixOf(nextCell); - if (nextCell.getLevel() == cell.getLevel() && nextCell.isLeaf()) { - return collectDocs(acceptContains); - } else { - return null; } + SmallDocSet result = collectDocs(acceptContains); + nextTerm(); + return result; + } + + private boolean nextTerm() throws IOException { + if ((thisTerm = termsEnum.next()) == null) + return false; + indexedCell = grid.readCell(thisTerm, indexedCell); + return true; + } + + private SmallDocSet union(SmallDocSet aSet, SmallDocSet bSet) { + if (bSet != null) { + if (aSet == null) + return bSet; + return aSet.union(bSet);//union is 'or' + } + return aSet; } private SmallDocSet collectDocs(Bits acceptContains) throws IOException { Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision ) @@ -24,7 +24,6 @@ import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Shape; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; @@ -127,8 +126,8 @@ return pointsOnly; } - /** True if only indexed points shall be supported. There are no "leafs" in such a case. See - * {@link org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter#hasIndexedLeaves}. */ + /** True if only indexed points shall be supported. There are no "leafs" in such a case, except those + * at maximum precision. */ public void setPointsOnly(boolean pointsOnly) { this.pointsOnly = pointsOnly; } @@ -142,23 +141,29 @@ /** * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a * {@link org.apache.lucene.analysis.TokenStream}. - * {@code simplifyIndexedCells} is an optional hint affecting non-point shapes: it will - * simply/aggregate sets of complete leaves in a cell to its parent, resulting in ~20-25% - * fewer cells. It will likely be removed in the future. */ public Field[] createIndexableFields(Shape shape, double distErr) { int detailLevel = grid.getLevelForDistance(distErr); - TokenStream tokenStream = createTokenStream(shape, detailLevel); + //TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override tokenStream() + Iterator cells = createCellIteratorToIndex(shape, detailLevel, null); + CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); + cellToBytesRefIterator.reset(cells); + CellTokenStream tokenStream = new CellTokenStream(); + tokenStream.setBytesRefIterator(cellToBytesRefIterator); Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); return new Field[]{field}; } - protected TokenStream createTokenStream(Shape shape, int detailLevel) { + protected CellToBytesRefIterator newCellToBytesRefIterator() { + //subclasses could return one that never emits leaves, or does both, or who knows. + return new CellToBytesRefIterator(); + } + + protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { if (pointsOnly && !(shape instanceof Point)) { throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing"); } - Iterator cells = grid.getTreeCellIterator(shape, detailLevel); - return new CellTokenStream().setCells(cells); + return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator } /* Indexed, tokenized, not stored. */ Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision ) @@ -21,6 +21,7 @@ import java.util.Iterator; import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.SpatialRelation; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; @@ -53,13 +54,11 @@ // least it would just make things more complicated. protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels() - protected final boolean hasIndexedLeaves; public AbstractVisitingPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, - int detailLevel, int prefixGridScanLevel, boolean hasIndexedLeaves) { + int detailLevel, int prefixGridScanLevel) { super(queryShape, fieldName, grid, detailLevel); this.prefixGridScanLevel = Math.max(0, Math.min(prefixGridScanLevel, grid.getMaxLevels() - 1)); - this.hasIndexedLeaves = hasIndexedLeaves; assert detailLevel <= grid.getMaxLevels(); } @@ -90,7 +89,7 @@ * which by default finds the top cells that intersect {@code queryShape}. If * there isn't an indexed cell for a corresponding cell returned for this * method then it's short-circuited until it finds one, at which point - * {@link #visit(org.apache.lucene.spatial.prefix.tree.Cell)} is called. At + * {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)} is called. At * some depths, of the tree, the algorithm switches to a scanning mode that * calls {@link #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)} * for each leaf cell found. @@ -114,10 +113,6 @@ does act as a short-circuit. So maybe do some percent of the time or when the level is above some threshold. - * Once we don't have redundant non-leaves indexed with leaf cells (LUCENE-4942), we can - sometimes know to call next() instead of seek() if we're processing a leaf cell that - didn't have a corresponding non-leaf. - */ // @@ -125,10 +120,10 @@ // private VNode curVNode;//current pointer, derived from query shape - private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf - private Cell scanCell; + private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf. in main loop only private BytesRef thisTerm;//the result of termsEnum.term() + private Cell indexedCell;//Cell wrapper of thisTerm. Always updated when thisTerm is. public VisitorTemplate(LeafReaderContext context, Bits acceptDocs) throws IOException { super(context, acceptDocs); @@ -138,9 +133,9 @@ assert curVNode == null : "Called more than once?"; if (termsEnum == null) return null; - //advance - if ((thisTerm = termsEnum.next()) == null) - return null; // all done + if (!nextTerm()) {//advances + return null; + } curVNode = new VNode(null); curVNode.reset(grid.getWorldCell()); @@ -177,38 +172,54 @@ } //Seek to curVNode's cell (or skip if termsEnum has moved beyond) - curVNode.cell.getTokenBytesNoLeaf(curVNodeTerm); - int compare = thisTerm.compareTo(curVNodeTerm); + final int compare = indexedCell.compareToNoLeaf(curVNode.cell); if (compare > 0) { - // leap frog (termsEnum is beyond where we would otherwise seek) - assert ! context.reader().terms(fieldName).iterator(null).seekExact(curVNodeTerm) : "should be absent"; - } else { + // The indexed cell is after; continue loop to next query cell + continue; + } - if (compare < 0) { + if (compare < 0) { + // The indexed cell is before; seek ahead to query cell: - // Seek ! + // Seek ! + curVNode.cell.getTokenBytesNoLeaf(curVNodeTerm); - TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(curVNodeTerm); - if (seekStatus == TermsEnum.SeekStatus.END) - break; // all done - thisTerm = termsEnum.term(); + TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(curVNodeTerm); + if (seekStatus == TermsEnum.SeekStatus.END) + break; // all done + thisTerm = termsEnum.term(); + indexedCell = grid.readCell(thisTerm, indexedCell); - if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) { + if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) { - continue; // leap frog + // Did we find a leaf of the cell we were looking for or something after? + if (!indexedCell.isLeaf() || indexedCell.compareToNoLeaf(curVNode.cell) != 0) + continue; // The indexed cell is after; continue loop to next query cell - } - } + } + } - // Visit! - boolean descend = visit(curVNode.cell); - //advance - if ((thisTerm = termsEnum.next()) == null) - break; // all done - if (descend) - addIntersectingChildren(); + // indexedCell == queryCell (disregarding leaf). + // If indexedCell is a leaf then there's no prefix (prefix sorts before) -- just visit and continue + if (indexedCell.isLeaf()) { + visitLeaf(indexedCell);//nocommit or query cell? + if (!nextTerm()) break; + continue; } + // If a prefix (non-leaf) then visit; see if we descend. + final boolean descend = visitPrefix(curVNode.cell);//need to use curVNode.cell not indexedCell + if (!nextTerm()) break; + // Check for adjacent leaf with the same prefix + if (indexedCell.isLeaf() && indexedCell.getLevel() == curVNode.cell.getLevel()) { + visitLeaf(indexedCell);//nocommit or query cell? + if (!nextTerm()) break; + } + + if (descend) { + addIntersectingChildren(); + } + }//main loop return finish(); } - /** Called initially, and whenever {@link #visit(org.apache.lucene.spatial.prefix.tree.Cell)} + /** Called initially, and whenever {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)} * returns true. */ private void addIntersectingChildren() throws IOException { assert thisTerm != null; @@ -216,20 +227,6 @@ if (cell.getLevel() >= detailLevel) throw new IllegalStateException("Spatial logic error"); - //Check for adjacent leaf (happens for indexed non-point shapes) - if (hasIndexedLeaves && cell.getLevel() != 0) { - //If the next indexed term just adds a leaf marker to cell, - // then add all of those docs - scanCell = grid.readCell(thisTerm, scanCell); - assert curVNode.cell.isPrefixOf(scanCell) : "missing leaf or descendants"; - if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) { - visitLeaf(scanCell); - //advance - if ((thisTerm = termsEnum.next()) == null) - return; // all done - } - } - //Decide whether to continue to divide & conquer, or whether it's time to // scan through terms beneath this cell. // Scanning is a performance optimization trade-off. @@ -269,24 +266,24 @@ * #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}. */ protected void scan(int scanDetailLevel) throws IOException { - for ( ; - thisTerm != null; - thisTerm = termsEnum.next()) { - scanCell = grid.readCell(thisTerm, scanCell); - if (!curVNode.cell.isPrefixOf(scanCell)) - break; - - int termLevel = scanCell.getLevel(); - if (termLevel < scanDetailLevel) { - if (scanCell.isLeaf()) - visitScanned(scanCell); - } else if (termLevel == scanDetailLevel) { - if (!scanCell.isLeaf())//LUCENE-5529 - visitScanned(scanCell); + assert curVNode.cell.isPrefixOf(indexedCell); + do { + if (indexedCell.getLevel() == scanDetailLevel + || (indexedCell.getLevel() < scanDetailLevel && indexedCell.isLeaf())) { + visitScanned(indexedCell); } - }//term loop + //advance + if (!nextTerm()) break; + } while (curVNode.cell.isPrefixOf(indexedCell)); } + private boolean nextTerm() throws IOException { + if ((thisTerm = termsEnum.next()) == null) + return false; + indexedCell = grid.readCell(thisTerm, indexedCell); + return true; + } + /** Used for {@link VNode#children}. */ private class VNodeCellIterator implements Iterator { @@ -322,23 +319,21 @@ protected abstract DocIdSet finish() throws IOException; /** - * Visit an indexed non-leaf cell returned from - * {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Cell)} - * that is also found in the index. - * It will also be called by the default implementation of - * {@link #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)} for - * cells at the bottom detail level. + * Visit an indexed non-leaf cell. The presence of a prefix cell implies + * there are leaf cells at further levels. The cell passed should have it's + * {@link org.apache.lucene.spatial.prefix.tree.Cell#getShapeRel()} set + * relative to the filtered shape. * * @param cell An intersecting cell; not a leaf. - * @return true to descend to more levels. It is an error to return true - * if cell.level == detailLevel + * @return true to descend to more levels. */ - protected abstract boolean visit(Cell cell) throws IOException; + protected abstract boolean visitPrefix(Cell cell) throws IOException; /** * Called when an indexed leaf cell is found. An - * indexed leaf cell means associated documents generally won't be found at - * further detail levels. + * indexed leaf cell usually means associated documents won't be found at + * further detail levels. However, if a document has + * multiple overlapping shapes at different resolutions, then this isn't true. */ protected abstract void visitLeaf(Cell cell) throws IOException; @@ -347,14 +342,16 @@ * might not even intersect the query shape, so be sure to check for that. * The default implementation will check that and if passes then call * {@link #visitLeaf(org.apache.lucene.spatial.prefix.tree.Cell)} or - * {@link #visit(org.apache.lucene.spatial.prefix.tree.Cell)}. + * {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)}. */ protected void visitScanned(Cell cell) throws IOException { - if (queryShape.relate(cell.getShape()).intersects()) { + final SpatialRelation relate = cell.getShape().relate(queryShape); + if (relate.intersects()) { + cell.setShapeRel(relate); if (cell.isLeaf()) { visitLeaf(cell); } else { - visit(cell); + visitPrefix(cell); } } } Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java (revision ) @@ -24,7 +24,8 @@ import org.apache.lucene.util.BytesRef; /** - * Implementation of {@link ShapeFieldCacheProvider} designed for {@link PrefixTreeStrategy}s that index points. + * Implementation of {@link ShapeFieldCacheProvider} designed for {@link PrefixTreeStrategy}s that index points + * (AND ONLY POINTS!). * * @lucene.internal */ @@ -40,8 +41,8 @@ @Override protected Point readShape(BytesRef term) { - scanCell = grid.readCell(term, scanCell);; - if (scanCell.getLevel() == grid.getMaxLevels() && !scanCell.isLeaf())//points are never flagged as leaf + scanCell = grid.readCell(term, scanCell); + if (scanCell.getLevel() == grid.getMaxLevels()) return scanCell.getShape().getCenter(); return null; } Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeFacetCounter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeFacetCounter.java (revision 1664352) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeFacetCounter.java (revision ) @@ -20,9 +20,9 @@ import java.io.IOException; import com.spatial4j.core.shape.Shape; -import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; @@ -33,6 +33,15 @@ /** * Computes facets on cells for {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}. + *

+ * NOTE: If for a given document and a given field using {@link org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy} + * multiple fields are indexed (i.e. multi-valued) and at least one of them is a non-point, then there is a possibility + * of double-counting the document in the facet results. Since each shape is independently turned into grid cells at + * a resolution chosen by the shape's size, it's possible they will be indexed at different resolutions. This means + * the document could be present in BOTH the postings for a cell as a prefix and as a leaf. To avoid this, when + * indexing (not search) use a single valued field with a {@link com.spatial4j.core.shape.ShapeCollection}, OR + * calculate a suitable level to index both and create a strategy instance with that level as it's maxLevel, and set + * distErrPct to 0 so that each shape will go to that level. * * @lucene.experimental */ @@ -114,8 +123,7 @@ //AbstractVisitingPrefixTreeFilter is a Lucene Filter. We don't need a filter; we use it for its great prefix-tree // traversal code. TODO consider refactoring if/when it makes sense (more use cases than this) - new AbstractVisitingPrefixTreeFilter(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel, - !strategy.isPointsOnly()) { + new AbstractVisitingPrefixTreeFilter(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel) { @Override public String toString(String field) { @@ -139,7 +147,7 @@ } @Override - protected boolean visit(Cell cell) throws IOException { + protected boolean visitPrefix(Cell cell) throws IOException { // At facetLevel... if (cell.getLevel() == facetLevel) { // Count docs