Index: dev-tools/idea/lucene/benchmark/src/benchmark.iml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision 1535069)
+++ dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision )
@@ -24,6 +24,7 @@
+
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision )
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision )
@@ -0,0 +1,120 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.queries.CustomScoreQuery;
+import org.apache.lucene.queries.function.FunctionQuery;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Reads spatial data from the body field docs from an internally created {@link LineDocSource}.
+ * It's parsed by {@link com.spatial4j.core.context.SpatialContext#readShape(String)} and then
+ * further manipulated via a configurable {@link SpatialDocMaker.ShapeConverter}. When using point
+ * data, it's likely you'll want to configure the shape converter so that the query shapes actually
+ * cover a region. The queries are all created & cached in advance. This query maker works in
+ * conjunction with {@link SpatialDocMaker}. See spatial.alg for a listing of options, in
+ * particular the options starting with "query.".
+ */
+public class SpatialFileQueryMaker extends AbstractQueryMaker {
+ protected SpatialStrategy strategy;
+ protected double distErrPct;//NaN if not set
+ protected SpatialOperation operation;
+ protected boolean score;
+
+ protected SpatialDocMaker.ShapeConverter shapeConverter;
+
+ @Override
+ public void setConfig(Config config) throws Exception {
+ strategy = SpatialDocMaker.getSpatialStrategy(config.getRoundNumber());
+ shapeConverter = SpatialDocMaker.makeShapeConverter(strategy, config, "query.spatial.");
+
+ distErrPct = config.get("query.spatial.distErrPct", Double.NaN);
+ operation = SpatialOperation.get(config.get("query.spatial.predicate", "Intersects"));
+ score = config.get("query.spatial.score", false);
+
+ super.setConfig(config);//call last, will call prepareQueries()
+ }
+
+ @Override
+ protected Query[] prepareQueries() throws Exception {
+ final int maxQueries = config.get("query.file.maxQueries", 1000);
+ Config srcConfig = new Config(new Properties());
+ srcConfig.set("docs.file", config.get("query.file", null));
+ srcConfig.set("line.parser", config.get("query.file.line.parser", null));
+ srcConfig.set("content.source.forever", "false");
+
+ List queries = new ArrayList<>();
+ LineDocSource src = new LineDocSource();
+ try {
+ src.setConfig(srcConfig);
+ src.resetInputs();
+ DocData docData = new DocData();
+ for (int i = 0; i < maxQueries; i++) {
+ docData = src.getNextDocData(docData);
+ Shape shape = SpatialDocMaker.makeShapeFromString(strategy, docData.getName(), docData.getBody());
+ if (shape != null) {
+ shape = shapeConverter.convert(shape);
+ queries.add(makeQueryFromShape(shape));
+ } else {
+ i--;//skip
+ }
+ }
+ } catch (NoMoreDataException e) {
+ //all-done
+ } finally {
+ src.close();
+ }
+ return queries.toArray(new Query[queries.size()]);
+ }
+
+
+ protected Query makeQueryFromShape(Shape shape) {
+ SpatialArgs args = new SpatialArgs(operation, shape);
+ if (!Double.isNaN(distErrPct))
+ args.setDistErrPct(distErrPct);
+
+ if (score) {
+ ValueSource valueSource = strategy.makeDistanceValueSource(shape.getCenter());
+ return new CustomScoreQuery(strategy.makeQuery(args), new FunctionQuery(valueSource));
+ } else {
+ //strategy.makeQuery() could potentially score (isn't well defined) so instead we call
+ // makeFilter() and wrap
+
+ Filter filter = strategy.makeFilter(args);
+ if (filter instanceof QueryWrapperFilter) {
+ return ((QueryWrapperFilter)filter).getQuery();
+ } else {
+ return new ConstantScoreQuery(filter);
+ }
+ }
+ }
+
+}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision )
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision )
@@ -0,0 +1,44 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A line parser for Geonames.org data.
+ * See 'geoname' table.
+ * Requires {@link SpatialDocMaker}.
+ */
+public class GeonamesLineParser extends LineDocSource.LineParser {
+
+ /** This header will be ignored; the geonames format is fixed and doesn't have a header line. */
+ public GeonamesLineParser(String[] header) {
+ super(header);
+ }
+
+ @Override
+ public void parseLine(DocData docData, String line) {
+ String[] parts = line.split("\\t", 7);//no more than first 6 fields needed
+
+ // Sample data line:
+ // 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ...
+ // ID, Name, Alternate name (unused), Lat, Lon, ...
+
+ docData.setID(Integer.parseInt(parts[0]));//note: overwrites ID assigned by LineDocSource
+ docData.setName(parts[1]);
+ docData.setBody(parts[4]+","+parts[5]); // latitude , longitude
+ }
+}
Index: lucene/benchmark/build.xml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/build.xml (revision 1535069)
+++ lucene/benchmark/build.xml (revision )
@@ -37,6 +37,8 @@
+
+
@@ -62,6 +64,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -147,8 +168,10 @@
+
+
@@ -158,7 +181,8 @@
-
+
@@ -166,6 +190,7 @@
+
@@ -256,7 +281,7 @@
-
+
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision )
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision )
@@ -0,0 +1,207 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.context.SpatialContextFactory;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
+
+import java.util.AbstractMap;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * Indexes spatial data according to a configured {@link SpatialStrategy} with optional
+ * shape transformation via a configured {@link ShapeConverter}. The converter can turn points into
+ * circles and bounding boxes, in order to vary the type of indexing performance tests.
+ * Unless it's subclass-ed to do otherwise, this class configures a {@link SpatialContext},
+ * {@link SpatialPrefixTree}, and {@link RecursivePrefixTreeStrategy}. The Strategy is made
+ * available to a query maker via the static method {@link #getSpatialStrategy(int)}.
+ * See spatial.alg for a listing of spatial parameters, in particular those starting with "spatial."
+ * and "doc.spatial".
+ */
+public class SpatialDocMaker extends DocMaker {
+
+ public static final String SPATIAL_FIELD = "spatial";
+
+ //cache spatialStrategy by round number
+ private static Map spatialStrategyCache = new HashMap();
+
+ private SpatialStrategy strategy;
+ private ShapeConverter shapeConverter;
+
+ /**
+ * Looks up the SpatialStrategy from the given round --
+ * {@link org.apache.lucene.benchmark.byTask.utils.Config#getRoundNumber()}. It's an error
+ * if it wasn't created already for this round -- when SpatialDocMaker is initialized.
+ */
+ public static SpatialStrategy getSpatialStrategy(int roundNumber) {
+ SpatialStrategy result = spatialStrategyCache.get(roundNumber);
+ if (result == null) {
+ throw new IllegalStateException("Strategy should have been init'ed by SpatialDocMaker by now");
+ }
+ return result;
+ }
+
+ /**
+ * Builds a SpatialStrategy from configuration options.
+ */
+ protected SpatialStrategy makeSpatialStrategy(final Config config) {
+ //A Map view of Config that prefixes keys with "spatial."
+ Map configMap = new AbstractMap() {
+ @Override
+ public Set> entrySet() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String get(Object key) {
+ return config.get("spatial." + key, null);
+ }
+ };
+
+ SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null);
+
+ //Some day the strategy might be initialized with a factory but such a factory
+ // is non-existent.
+ return makeSpatialStrategy(config, configMap, ctx);
+ }
+
+ protected SpatialStrategy makeSpatialStrategy(final Config config, Map configMap,
+ SpatialContext ctx) {
+ //A factory for the prefix tree grid
+ SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
+
+ RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) {
+ {
+ //protected field
+ this.pointsOnly = config.get("spatial.docPointsOnly", false);
+ }
+ };
+
+ int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4);
+ if (prefixGridScanLevel < 0)
+ prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel;
+ strategy.setPrefixGridScanLevel(prefixGridScanLevel);
+
+ double distErrPct = config.get("spatial.distErrPct", .025);//doc & query; a default
+ strategy.setDistErrPct(distErrPct);
+ return strategy;
+ }
+
+ @Override
+ public void setConfig(Config config, ContentSource source) {
+ super.setConfig(config, source);
+ SpatialStrategy existing = spatialStrategyCache.get(config.getRoundNumber());
+ if (existing == null) {
+ //new round; we need to re-initialize
+ strategy = makeSpatialStrategy(config);
+ spatialStrategyCache.put(config.getRoundNumber(), strategy);
+ //TODO remove previous round config?
+ shapeConverter = makeShapeConverter(strategy, config, "doc.spatial.");
+ System.out.println("Spatial Strategy: " + strategy);
+ }
+ }
+
+ /**
+ * Optionally converts points to circles, and optionally bbox'es result.
+ */
+ public static ShapeConverter makeShapeConverter(final SpatialStrategy spatialStrategy,
+ Config config, String configKeyPrefix) {
+ //by default does no conversion
+ final double radiusDegrees = config.get(configKeyPrefix+"radiusDegrees", 0.0);
+ final double plusMinus = config.get(configKeyPrefix+"radiusDegreesRandPlusMinus", 0.0);
+ final boolean bbox = config.get(configKeyPrefix + "bbox", false);
+
+ return new ShapeConverter() {
+ @Override
+ public Shape convert(Shape shape) {
+ if (shape instanceof Point && (radiusDegrees != 0.0 || plusMinus != 0.0)) {
+ Point point = (Point)shape;
+ double radius = radiusDegrees;
+ if (plusMinus > 0.0) {
+ Random random = new Random(point.hashCode());//use hashCode so it's reproducibly random
+ radius += random.nextDouble() * 2 * plusMinus - plusMinus;
+ radius = Math.abs(radius);//can happen if configured plusMinus > radiusDegrees
+ }
+ shape = spatialStrategy.getSpatialContext().makeCircle(point, radius);
+ }
+ if (bbox)
+ shape = shape.getBoundingBox();
+ return shape;
+ }
+ };
+ }
+
+ /** Converts one shape to another. Created by
+ * {@link #makeShapeConverter(org.apache.lucene.spatial.SpatialStrategy, org.apache.lucene.benchmark.byTask.utils.Config, String)} */
+ public interface ShapeConverter {
+ Shape convert(Shape shape);
+ }
+
+ @Override
+ public Document makeDocument() throws Exception {
+
+ DocState docState = getDocState();
+
+ Document doc = super.makeDocument();
+
+ // Set SPATIAL_FIELD from body
+ DocData docData = docState.docData;
+ // makeDocument() resets docState.getBody() so we can't look there; look in Document
+ String shapeStr = doc.getField(DocMaker.BODY_FIELD).stringValue();
+ Shape shape = makeShapeFromString(strategy, docData.getName(), shapeStr);
+ if (shape != null) {
+ shape = shapeConverter.convert(shape);
+ //index
+ for (Field f : strategy.createIndexableFields(shape)) {
+ doc.add(f);
+ }
+ }
+
+ return doc;
+ }
+
+ public static Shape makeShapeFromString(SpatialStrategy strategy, String name, String shapeStr) {
+ if (shapeStr != null && shapeStr.length() > 0) {
+ try {
+ return strategy.getSpatialContext().readShape(shapeStr);
+ } catch (Exception e) {//InvalidShapeException TODO
+ System.err.println("Shape "+name+" wasn't parseable: "+e+" (skipping it)");
+ return null;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Document makeDocument(int size) throws Exception {
+ //TODO consider abusing the 'size' notion to number of shapes per document
+ throw new UnsupportedOperationException();
+ }
+}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (revision 1535069)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html (revision )
@@ -469,9 +469,10 @@
its own queryMaker instance.
CommitIndex and
- Optimize can be used to commit
- changes to the index and/or optimize the index created thus
- far.
+ ForceMerge can be used to commit
+ changes to the index then merge the index segments. The integer
+ parameter specifies how many segments to merge down to (default
+ 1).
WriteLineDoc prepares a 'line'
file where each line holds a document with title,
@@ -592,6 +593,9 @@
Doc deletion:
doc.delete.step
+
+
+
Spatial: Numerous; see spatial.alg
Task alternative packages:
Index: dev-tools/maven/lucene/benchmark/pom.xml.template
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- dev-tools/maven/lucene/benchmark/pom.xml.template (revision 1535069)
+++ dev-tools/maven/lucene/benchmark/pom.xml.template (revision )
@@ -80,6 +80,11 @@
${project.version}
+ ${project.groupId}
+ lucene-spatial
+ ${project.version}
+
+ com.ibm.icuicu4j
Index: dev-tools/idea/lucene/spatial/spatial.iml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- dev-tools/idea/lucene/spatial/spatial.iml (revision 1535069)
+++ dev-tools/idea/lucene/spatial/spatial.iml (revision )
@@ -11,7 +11,7 @@
-
+
Index: lucene/benchmark/conf/spatial.alg
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/conf/spatial.alg (revision )
+++ lucene/benchmark/conf/spatial.alg (revision )
@@ -0,0 +1,111 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements. See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License. You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# Spatial search benchmark
+# In order to use this, you'll need to first run 'ant geonames-files'.
+# You may need more memory when running this: -Dtask.mem=1000M
+# For docs on what options are available, see the javadocs.
+
+### Spatial Context, Grid, Strategy config
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker
+# SpatialContext: see SpatialContextFactory.makeSpatialContext
+#spatial.spatialContextFactory=com.spatial4j.core.context.jts.JtsSpatialContextFactory
+#spatial.geo=true
+#spatial.distCalculator=haversine
+#spatial.worldBounds=...
+# Spatial Grid: (PrefixTree) see SpatialPrefixTreeFactory.makeSPT
+#spatial.prefixTree=geohash (or quad)
+#spatial.maxLevels=11
+#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth
+# RecursivePrefixTreeStrategy:
+spatial.docPointsOnly=true
+#spatial.distErrPct=.25
+#spatial.prefixGridScanLevel=-4
+
+### Source & Doc
+content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource
+line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
+docs.file=work/geonames/allCountries.txt
+doc.tokenized=false
+# Next 3 props convert doc points to circles with a random radius and then optionally bbox'es
+#doc.spatial.radiusDegrees=0.0
+#doc.spatial.radiusDegreesRandPlusMinus=0.0
+#doc.spatial.bbox=false
+
+### Directory
+directory=FSDirectory
+#directory=RamDirectory
+compound=false
+merge.factor=10
+ram.flush.mb=64
+concurrent.merge.scheduler.max.thread.count=2
+
+### Query
+query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker
+query.file=work/geonames/allCountries.txt
+query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
+query.file.maxQueries=1000
+# Next 3 props convert query points to circles with a random radius and then optionally bbox'es
+query.spatial.radiusDegrees=0
+query.spatial.radiusDegreesRandPlusMinus=3
+query.spatial.bbox=false
+
+query.spatial.score=false
+#query.spatial.predicate=Intersects
+# (defaults to spatial.distErrPct)
+query.spatial.distErrPct=qDistErrPct:0.0:0.025:0.1:0.5
+
+### Misc
+
+log.step.AddDoc = 100000
+task.max.depth.log=1
+
+# -------------------------------------------------------------------------------------
+
+{ "Populate"
+ ResetSystemErase
+ CreateIndex
+ #1 million docs
+ [{ "MAddDocs" AddDoc} : 250000] : 4
+ ForceMerge(1)
+ CommitIndex
+ CloseIndex
+
+ RepSumByPref MAddDocs
+} : 1
+#set above round to 0 on subsequent runs if not changing indexing but experimenting with search
+
+OpenReader
+{"WarmJIT" Search > : 4000
+CloseReader
+
+{ "Rounds"
+ ResetSystemSoft
+
+ OpenReader
+ Search
+ {"RealQueries" Search > : 2000
+ CloseReader
+
+ NewRound
+} : 4
+
+
+#RepSumByName
+RepSumByPrefRound RealQueries
+
+