Index: dev-tools/idea/lucene/benchmark/src/benchmark.iml IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision 1535069) +++ dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision ) @@ -24,6 +24,7 @@ + Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision ) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision ) @@ -0,0 +1,120 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.queries.CustomScoreQuery; +import org.apache.lucene.queries.function.FunctionQuery; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +/** + * Reads spatial data from the body field docs from an internally created {@link LineDocSource}. + * It's parsed by {@link com.spatial4j.core.context.SpatialContext#readShape(String)} and then + * further manipulated via a configurable {@link SpatialDocMaker.ShapeConverter}. When using point + * data, it's likely you'll want to configure the shape converter so that the query shapes actually + * cover a region. The queries are all created & cached in advance. This query maker works in + * conjunction with {@link SpatialDocMaker}. See spatial.alg for a listing of options, in + * particular the options starting with "query.". + */ +public class SpatialFileQueryMaker extends AbstractQueryMaker { + protected SpatialStrategy strategy; + protected double distErrPct;//NaN if not set + protected SpatialOperation operation; + protected boolean score; + + protected SpatialDocMaker.ShapeConverter shapeConverter; + + @Override + public void setConfig(Config config) throws Exception { + strategy = SpatialDocMaker.getSpatialStrategy(config.getRoundNumber()); + shapeConverter = SpatialDocMaker.makeShapeConverter(strategy, config, "query.spatial."); + + distErrPct = config.get("query.spatial.distErrPct", Double.NaN); + operation = SpatialOperation.get(config.get("query.spatial.predicate", "Intersects")); + score = config.get("query.spatial.score", false); + + super.setConfig(config);//call last, will call prepareQueries() + } + + @Override + protected Query[] prepareQueries() throws Exception { + final int maxQueries = config.get("query.file.maxQueries", 1000); + Config srcConfig = new Config(new Properties()); + srcConfig.set("docs.file", config.get("query.file", null)); + srcConfig.set("line.parser", config.get("query.file.line.parser", null)); + srcConfig.set("content.source.forever", "false"); + + List queries = new ArrayList<>(); + LineDocSource src = new LineDocSource(); + try { + src.setConfig(srcConfig); + src.resetInputs(); + DocData docData = new DocData(); + for (int i = 0; i < maxQueries; i++) { + docData = src.getNextDocData(docData); + Shape shape = SpatialDocMaker.makeShapeFromString(strategy, docData.getName(), docData.getBody()); + if (shape != null) { + shape = shapeConverter.convert(shape); + queries.add(makeQueryFromShape(shape)); + } else { + i--;//skip + } + } + } catch (NoMoreDataException e) { + //all-done + } finally { + src.close(); + } + return queries.toArray(new Query[queries.size()]); + } + + + protected Query makeQueryFromShape(Shape shape) { + SpatialArgs args = new SpatialArgs(operation, shape); + if (!Double.isNaN(distErrPct)) + args.setDistErrPct(distErrPct); + + if (score) { + ValueSource valueSource = strategy.makeDistanceValueSource(shape.getCenter()); + return new CustomScoreQuery(strategy.makeQuery(args), new FunctionQuery(valueSource)); + } else { + //strategy.makeQuery() could potentially score (isn't well defined) so instead we call + // makeFilter() and wrap + + Filter filter = strategy.makeFilter(args); + if (filter instanceof QueryWrapperFilter) { + return ((QueryWrapperFilter)filter).getQuery(); + } else { + return new ConstantScoreQuery(filter); + } + } + } + +} Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision ) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision ) @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A line parser for Geonames.org data. + * See 'geoname' table. + * Requires {@link SpatialDocMaker}. + */ +public class GeonamesLineParser extends LineDocSource.LineParser { + + /** This header will be ignored; the geonames format is fixed and doesn't have a header line. */ + public GeonamesLineParser(String[] header) { + super(header); + } + + @Override + public void parseLine(DocData docData, String line) { + String[] parts = line.split("\\t", 7);//no more than first 6 fields needed + + // Sample data line: + // 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ... + // ID, Name, Alternate name (unused), Lat, Lon, ... + + docData.setID(Integer.parseInt(parts[0]));//note: overwrites ID assigned by LineDocSource + docData.setName(parts[1]); + docData.setBody(parts[4]+","+parts[5]); // latitude , longitude + } +} Index: lucene/benchmark/build.xml IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/build.xml (revision 1535069) +++ lucene/benchmark/build.xml (revision ) @@ -37,6 +37,8 @@ + + @@ -62,6 +64,25 @@ + + + + + + + + + + + + + + + + @@ -147,8 +168,10 @@ + + @@ -158,7 +181,8 @@ - + @@ -166,6 +190,7 @@ + @@ -256,7 +281,7 @@ - + Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision ) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision ) @@ -0,0 +1,207 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory; + +import java.util.AbstractMap; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +/** + * Indexes spatial data according to a configured {@link SpatialStrategy} with optional + * shape transformation via a configured {@link ShapeConverter}. The converter can turn points into + * circles and bounding boxes, in order to vary the type of indexing performance tests. + * Unless it's subclass-ed to do otherwise, this class configures a {@link SpatialContext}, + * {@link SpatialPrefixTree}, and {@link RecursivePrefixTreeStrategy}. The Strategy is made + * available to a query maker via the static method {@link #getSpatialStrategy(int)}. + * See spatial.alg for a listing of spatial parameters, in particular those starting with "spatial." + * and "doc.spatial". + */ +public class SpatialDocMaker extends DocMaker { + + public static final String SPATIAL_FIELD = "spatial"; + + //cache spatialStrategy by round number + private static Map spatialStrategyCache = new HashMap(); + + private SpatialStrategy strategy; + private ShapeConverter shapeConverter; + + /** + * Looks up the SpatialStrategy from the given round -- + * {@link org.apache.lucene.benchmark.byTask.utils.Config#getRoundNumber()}. It's an error + * if it wasn't created already for this round -- when SpatialDocMaker is initialized. + */ + public static SpatialStrategy getSpatialStrategy(int roundNumber) { + SpatialStrategy result = spatialStrategyCache.get(roundNumber); + if (result == null) { + throw new IllegalStateException("Strategy should have been init'ed by SpatialDocMaker by now"); + } + return result; + } + + /** + * Builds a SpatialStrategy from configuration options. + */ + protected SpatialStrategy makeSpatialStrategy(final Config config) { + //A Map view of Config that prefixes keys with "spatial." + Map configMap = new AbstractMap() { + @Override + public Set> entrySet() { + throw new UnsupportedOperationException(); + } + + @Override + public String get(Object key) { + return config.get("spatial." + key, null); + } + }; + + SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null); + + //Some day the strategy might be initialized with a factory but such a factory + // is non-existent. + return makeSpatialStrategy(config, configMap, ctx); + } + + protected SpatialStrategy makeSpatialStrategy(final Config config, Map configMap, + SpatialContext ctx) { + //A factory for the prefix tree grid + SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx); + + RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) { + { + //protected field + this.pointsOnly = config.get("spatial.docPointsOnly", false); + } + }; + + int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4); + if (prefixGridScanLevel < 0) + prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel; + strategy.setPrefixGridScanLevel(prefixGridScanLevel); + + double distErrPct = config.get("spatial.distErrPct", .025);//doc & query; a default + strategy.setDistErrPct(distErrPct); + return strategy; + } + + @Override + public void setConfig(Config config, ContentSource source) { + super.setConfig(config, source); + SpatialStrategy existing = spatialStrategyCache.get(config.getRoundNumber()); + if (existing == null) { + //new round; we need to re-initialize + strategy = makeSpatialStrategy(config); + spatialStrategyCache.put(config.getRoundNumber(), strategy); + //TODO remove previous round config? + shapeConverter = makeShapeConverter(strategy, config, "doc.spatial."); + System.out.println("Spatial Strategy: " + strategy); + } + } + + /** + * Optionally converts points to circles, and optionally bbox'es result. + */ + public static ShapeConverter makeShapeConverter(final SpatialStrategy spatialStrategy, + Config config, String configKeyPrefix) { + //by default does no conversion + final double radiusDegrees = config.get(configKeyPrefix+"radiusDegrees", 0.0); + final double plusMinus = config.get(configKeyPrefix+"radiusDegreesRandPlusMinus", 0.0); + final boolean bbox = config.get(configKeyPrefix + "bbox", false); + + return new ShapeConverter() { + @Override + public Shape convert(Shape shape) { + if (shape instanceof Point && (radiusDegrees != 0.0 || plusMinus != 0.0)) { + Point point = (Point)shape; + double radius = radiusDegrees; + if (plusMinus > 0.0) { + Random random = new Random(point.hashCode());//use hashCode so it's reproducibly random + radius += random.nextDouble() * 2 * plusMinus - plusMinus; + radius = Math.abs(radius);//can happen if configured plusMinus > radiusDegrees + } + shape = spatialStrategy.getSpatialContext().makeCircle(point, radius); + } + if (bbox) + shape = shape.getBoundingBox(); + return shape; + } + }; + } + + /** Converts one shape to another. Created by + * {@link #makeShapeConverter(org.apache.lucene.spatial.SpatialStrategy, org.apache.lucene.benchmark.byTask.utils.Config, String)} */ + public interface ShapeConverter { + Shape convert(Shape shape); + } + + @Override + public Document makeDocument() throws Exception { + + DocState docState = getDocState(); + + Document doc = super.makeDocument(); + + // Set SPATIAL_FIELD from body + DocData docData = docState.docData; + // makeDocument() resets docState.getBody() so we can't look there; look in Document + String shapeStr = doc.getField(DocMaker.BODY_FIELD).stringValue(); + Shape shape = makeShapeFromString(strategy, docData.getName(), shapeStr); + if (shape != null) { + shape = shapeConverter.convert(shape); + //index + for (Field f : strategy.createIndexableFields(shape)) { + doc.add(f); + } + } + + return doc; + } + + public static Shape makeShapeFromString(SpatialStrategy strategy, String name, String shapeStr) { + if (shapeStr != null && shapeStr.length() > 0) { + try { + return strategy.getSpatialContext().readShape(shapeStr); + } catch (Exception e) {//InvalidShapeException TODO + System.err.println("Shape "+name+" wasn't parseable: "+e+" (skipping it)"); + return null; + } + } + return null; + } + + @Override + public Document makeDocument(int size) throws Exception { + //TODO consider abusing the 'size' notion to number of shapes per document + throw new UnsupportedOperationException(); + } +} Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (revision 1535069) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (revision ) @@ -469,9 +469,10 @@ its own queryMaker instance.
  • CommitIndex and - Optimize can be used to commit - changes to the index and/or optimize the index created thus - far. + ForceMerge can be used to commit + changes to the index then merge the index segments. The integer + parameter specifies how many segments to merge down to (default + 1).
  • WriteLineDoc prepares a 'line' file where each line holds a document with title, @@ -592,6 +593,9 @@
  • Doc deletion:
    • doc.delete.step
    +
  • + +
  • Spatial: Numerous; see spatial.alg
  • Task alternative packages: Index: dev-tools/maven/lucene/benchmark/pom.xml.template IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- dev-tools/maven/lucene/benchmark/pom.xml.template (revision 1535069) +++ dev-tools/maven/lucene/benchmark/pom.xml.template (revision ) @@ -80,6 +80,11 @@ ${project.version} + ${project.groupId} + lucene-spatial + ${project.version} + + com.ibm.icu icu4j Index: dev-tools/idea/lucene/spatial/spatial.iml IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- dev-tools/idea/lucene/spatial/spatial.iml (revision 1535069) +++ dev-tools/idea/lucene/spatial/spatial.iml (revision ) @@ -11,7 +11,7 @@ - + Index: lucene/benchmark/conf/spatial.alg IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/conf/spatial.alg (revision ) +++ lucene/benchmark/conf/spatial.alg (revision ) @@ -0,0 +1,111 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# ------------------------------------------------------------------------------------- +# Spatial search benchmark +# In order to use this, you'll need to first run 'ant geonames-files'. +# You may need more memory when running this: -Dtask.mem=1000M +# For docs on what options are available, see the javadocs. + +### Spatial Context, Grid, Strategy config +doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker +# SpatialContext: see SpatialContextFactory.makeSpatialContext +#spatial.spatialContextFactory=com.spatial4j.core.context.jts.JtsSpatialContextFactory +#spatial.geo=true +#spatial.distCalculator=haversine +#spatial.worldBounds=... +# Spatial Grid: (PrefixTree) see SpatialPrefixTreeFactory.makeSPT +#spatial.prefixTree=geohash (or quad) +#spatial.maxLevels=11 +#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth +# RecursivePrefixTreeStrategy: +spatial.docPointsOnly=true +#spatial.distErrPct=.25 +#spatial.prefixGridScanLevel=-4 + +### Source & Doc +content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource +line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser +docs.file=work/geonames/allCountries.txt +doc.tokenized=false +# Next 3 props convert doc points to circles with a random radius and then optionally bbox'es +#doc.spatial.radiusDegrees=0.0 +#doc.spatial.radiusDegreesRandPlusMinus=0.0 +#doc.spatial.bbox=false + +### Directory +directory=FSDirectory +#directory=RamDirectory +compound=false +merge.factor=10 +ram.flush.mb=64 +concurrent.merge.scheduler.max.thread.count=2 + +### Query +query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker +query.file=work/geonames/allCountries.txt +query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser +query.file.maxQueries=1000 +# Next 3 props convert query points to circles with a random radius and then optionally bbox'es +query.spatial.radiusDegrees=0 +query.spatial.radiusDegreesRandPlusMinus=3 +query.spatial.bbox=false + +query.spatial.score=false +#query.spatial.predicate=Intersects +# (defaults to spatial.distErrPct) +query.spatial.distErrPct=qDistErrPct:0.0:0.025:0.1:0.5 + +### Misc + +log.step.AddDoc = 100000 +task.max.depth.log=1 + +# ------------------------------------------------------------------------------------- + +{ "Populate" + ResetSystemErase + CreateIndex + #1 million docs + [{ "MAddDocs" AddDoc} : 250000] : 4 + ForceMerge(1) + CommitIndex + CloseIndex + + RepSumByPref MAddDocs +} : 1 +#set above round to 0 on subsequent runs if not changing indexing but experimenting with search + +OpenReader +{"WarmJIT" Search > : 4000 +CloseReader + +{ "Rounds" + ResetSystemSoft + + OpenReader + Search + {"RealQueries" Search > : 2000 + CloseReader + + NewRound +} : 4 + + +#RepSumByName +RepSumByPrefRound RealQueries + +