Index: dev-tools/idea/lucene/benchmark/src/benchmark.iml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision 1525374)
+++ dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision )
@@ -24,6 +24,7 @@
+
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision )
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision )
@@ -0,0 +1,109 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * NOCOMMIT
+ */
+public class SpatialFileQueryMaker extends AbstractQueryMaker {
+ protected SpatialStrategy strategy;
+ protected double distErrPct;//NaN if not set
+ protected SpatialOperation operation;
+ protected boolean score;
+
+ private SpatialDocMaker.ShapeConverter shapeConverter;
+
+ @Override
+ public void setConfig(Config config) throws Exception {
+ strategy = SpatialDocMaker.getSpatialStrategy(config);
+ shapeConverter = SpatialDocMaker.makeShapeConverter(strategy, config, "query.spatial.");
+
+ distErrPct = config.get("query.spatial.distErrPct", Double.NaN);
+ operation = SpatialOperation.get(config.get("query.spatial.predicate", "Intersects"));
+ score = config.get("query.spatial.score", false);
+
+ super.setConfig(config);//call last
+ }
+
+ @Override
+ protected Query[] prepareQueries() throws Exception {
+ final int maxQueries = config.get("query.file.maxQueries", 1000);
+ Config srcConfig = new Config(new Properties());
+ srcConfig.set("docs.file", config.get("query.file", null));
+ srcConfig.set("line.parser", config.get("query.file.line.parser", null));
+ srcConfig.set("content.source.forever", "false");
+
+ List queries = new ArrayList<>();
+ LineDocSource src = new LineDocSource();
+ try {
+ src.setConfig(srcConfig);
+ src.resetInputs();
+ DocData docData = new DocData();
+ for (int i = 0; i < maxQueries; i++) {
+ docData = src.getNextDocData(docData);
+ Shape shape = SpatialDocMaker.makeShapeFromString(strategy, docData.getName(), docData.getBody());
+ if (shape != null) {
+ shape = shapeConverter.convert(shape);
+ queries.add(makeQueryFromShape(shape));
+ } else {
+ i--;//skip
+ }
+ }
+ } catch (NoMoreDataException e) {
+ //all-done
+ } finally {
+ src.close();
+ }
+ return queries.toArray(new Query[queries.size()]);
+ }
+
+
+ private Query makeQueryFromShape(Shape shape) {
+ SpatialArgs args = new SpatialArgs(operation, shape);
+ if (!Double.isNaN(distErrPct))
+ args.setDistErrPct(distErrPct);
+
+ if (score) {
+ //Queries generally score
+ return strategy.makeQuery(args);
+ } else {
+ //Filters do not score
+ Filter filter = strategy.makeFilter(args);
+ if (filter instanceof QueryWrapperFilter) {
+ return ((QueryWrapperFilter)filter).getQuery();
+ } else {
+ return new ConstantScoreQuery(filter);
+ }
+ }
+ }
+
+}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision )
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision )
@@ -0,0 +1,44 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A line parser for Geonames.org data.
+ * See 'geoname' table.
+ * Requires {@link SpatialDocMaker}.
+ */
+public class GeonamesLineParser extends LineDocSource.LineParser {
+
+ /** This header will be ignored; the geonames format is fixed and doesn't have a header line. */
+ public GeonamesLineParser(String[] header) {
+ super(header);
+ }
+
+ @Override
+ public void parseLine(DocData docData, String line) {
+ String[] parts = line.split("\\t", 7);//no more than first 6 fields needed
+
+ // Sample data line:
+ // 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ...
+ // ID, Name, Alternate name (unused), Lat, Lon, ...
+
+ docData.setID(Integer.parseInt(parts[0]));//note: overwrites ID assigned by LineDocSource
+ docData.setName(parts[1]);
+ docData.setBody(parts[4]+","+parts[5]); // latitude , longitude
+ }
+}
Index: lucene/benchmark/build.xml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/build.xml (revision 1525374)
+++ lucene/benchmark/build.xml (revision )
@@ -37,6 +37,8 @@
+
+
@@ -62,6 +64,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -147,8 +165,10 @@
+
+
@@ -158,7 +178,8 @@
-
+
@@ -166,6 +187,7 @@
+
@@ -256,7 +278,7 @@
-
+
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision )
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision )
@@ -0,0 +1,186 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.context.SpatialContextFactory;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
+
+import java.util.AbstractMap;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * nocommit
+ */
+public class SpatialDocMaker extends DocMaker {
+
+ public static final String SPATIAL_FIELD = "spatial";
+
+ //cache spatialStrategy by round number
+ private static Map spatialStrategyCache = new HashMap();
+
+ private SpatialStrategy strategy;
+ private ShapeConverter shapeConverter;
+
+ /**
+ * Builds a SpatialStrategy from configuration options. Subsequent calls will return
+ * the same strategy instance for the given round --
+ * {@link org.apache.lucene.benchmark.byTask.utils.Config#getRoundNumber()}.
+ */
+ public static SpatialStrategy getSpatialStrategy(final Config config) {
+ SpatialStrategy result = spatialStrategyCache.get(config.getRoundNumber());
+ if (result == null) {
+ result = initSpatialStrategy(config);
+ System.out.println("Spatial Strategy: " + result);
+ spatialStrategyCache.put(config.getRoundNumber(), result);
+ }
+ return result;
+ }
+
+ private static SpatialStrategy initSpatialStrategy(final Config config) {
+ //A Map view of Config that prefixes keys with "spatial."
+ Map configMap = new AbstractMap() {
+ @Override
+ public Set> entrySet() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String get(Object key) {
+ return config.get("spatial." + key, null);
+ }
+ };
+
+ SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null);
+
+ SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
+
+ //Some day the below might be initialized with a factory but such a factory
+ // is non-existent.
+
+ RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) {
+ {
+ //protected field
+ pointsOnly = config.get("doc.spatial.pointsOnly", false);
+ }
+ };
+
+ int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4);
+ if (prefixGridScanLevel < 0)
+ prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel;
+ strategy.setPrefixGridScanLevel(prefixGridScanLevel);
+
+ double distErrPct = config.get("spatial.distErrPct", .025);//doc & query
+ strategy.setDistErrPct(distErrPct);
+ return strategy;
+ }
+
+ @Override
+ public void setConfig(Config config, ContentSource source) {
+ super.setConfig(config, source);
+ strategy = getSpatialStrategy(config);
+ shapeConverter = makeShapeConverter(strategy, config, "doc.spatial.");
+ }
+
+ /**
+ * Configurably converts points to circles, and optionally bbox'es result.
+ * nocommit
+ */
+ public static ShapeConverter makeShapeConverter(final SpatialStrategy spatialStrategy,
+ Config config, String configKeyPrefix) {
+ //by default does no conversion
+ final double radiusDegrees = config.get(configKeyPrefix+"radiusDegrees", 0.0);
+ final double plusMinus = config.get(configKeyPrefix+"radiusDegreesRandPlusMinus", 0.0);
+ final boolean bbox = config.get(configKeyPrefix + "bbox", false);
+
+ return new ShapeConverter() {
+ @Override
+ public Shape convert(Shape shape) {
+ if (shape instanceof Point && radiusDegrees > 0.0) {
+ Point point = (Point)shape;
+ double radius = radiusDegrees;
+ if (plusMinus > 0.0) {
+ Random random = new Random(point.hashCode());//use hashCode so it's reproducibly random
+ radius += random.nextDouble() * 2 * plusMinus - plusMinus;
+ radius = Math.abs(radius);//can happen if configured plusMinus > radiusDegrees
+ }
+ shape = spatialStrategy.getSpatialContext().makeCircle(point, radius);
+ }
+ if (bbox)
+ shape = shape.getBoundingBox();
+ return shape;
+ }
+ };
+ }
+
+ public interface ShapeConverter {
+ Shape convert(Shape shape);
+ }
+
+ @Override
+ public Document makeDocument() throws Exception {
+
+ DocState docState = getDocState();
+
+ Document doc = super.makeDocument();
+
+ // Set SPATIAL_FIELD from body
+ DocData docData = docState.docData;
+ // makeDocument() resets docState.getBody() so we can't look there; look in Document
+ String shapeStr = doc.getField(DocMaker.BODY_FIELD).stringValue();
+ Shape shape = makeShapeFromString(strategy, docData.getName(), shapeStr);
+ if (shape != null) {
+ shape = shapeConverter.convert(shape);
+ //index
+ for (Field f : strategy.createIndexableFields(shape)) {
+ doc.add(f);
+ }
+ }
+
+ return doc;
+ }
+
+ public static Shape makeShapeFromString(SpatialStrategy strategy, String name, String shapeStr) {
+ if (shapeStr != null && shapeStr.length() > 0) {
+ try {
+ return strategy.getSpatialContext().readShape(shapeStr);
+ } catch (Exception e) {//InvalidShapeException TODO
+ System.err.println("Shape "+name+" wasn't parseable: "+e+" (skipping it)");
+ return null;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Document makeDocument(int size) throws Exception {
+ //TODO consider abusing the 'size' notion to number of shapes per document
+ throw new UnsupportedOperationException();
+ }
+}
Index: dev-tools/maven/lucene/benchmark/pom.xml.template
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- dev-tools/maven/lucene/benchmark/pom.xml.template (revision 1525374)
+++ dev-tools/maven/lucene/benchmark/pom.xml.template (revision )
@@ -80,6 +80,11 @@
${project.version}
+ ${project.groupId}
+ lucene-spatial
+ ${project.version}
+
+
com.ibm.icu
icu4j
Index: dev-tools/idea/lucene/spatial/spatial.iml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- dev-tools/idea/lucene/spatial/spatial.iml (revision 1525374)
+++ dev-tools/idea/lucene/spatial/spatial.iml (revision )
@@ -11,7 +11,7 @@
-
+
Index: lucene/benchmark/conf/spatial.alg
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/conf/spatial.alg (revision )
+++ lucene/benchmark/conf/spatial.alg (revision )
@@ -0,0 +1,96 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements. See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License. You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# Spatial search benchmark
+# In order to use this, you'll need to first run 'ant geonames-files'.
+# You may need more memory when running this: -Dtask.mem=1000M
+# For docs on what options are available, see the javadocs.
+
+### Spatial Strategy config
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker
+#quad or geohash
+spatial.prefixTree=geohash
+#spatial.distErrPct=.25
+#spatial.prefixGridScanLevel=-4
+#spatial.maxLevels=11
+#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth
+doc.spatial.pointsOnly=true
+
+### Source & Doc
+content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource
+line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
+docs.file=work/geonames/allCountries.txt
+doc.tokenized=false
+
+
+### Directory
+#directory=FSDirectory
+directory=RamDirectory
+compound=false #faster
+merge.factor=10
+ram.flush.mb=64
+concurrent.merge.scheduler.max.thread.count=2
+
+### Query
+query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker
+query.file=work/geonames/allCountries.txt
+query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
+query.file.maxQueries=1000
+query.spatial.radiusDegrees=10
+query.spatial.bbox=false
+query.spatial.score=false
+#query.spatial.predicate=Intersects
+#query.spatial.distErrPct (defaults to spatial.distErrPct)
+
+### Misc
+
+# task at this depth or less would print when they start
+log.step.AddDoc = 100000
+task.max.depth.log=1
+
+# -------------------------------------------------------------------------------------
+
+
+
+ResetSystemErase
+
+{ "Populate"
+ CreateIndex
+ [{ "MAddDocs" AddDoc} : 250000] : 4
+ ForceMerge(1)
+ CommitIndex
+ CloseIndex
+}
+
+RepSumByPref MAddDocs
+
+{ "Round"
+ OpenReader
+ {"WarmQueries" Search > : 1000
+ {"RealQueries" Search > : 4000
+ CloseReader
+
+ ResetInputs
+ RepSumByName
+ NewRound
+} : 1
+
+
+#RepSumByName
+RepSumByPrefRound RealQueries
+
+