Index: dev-tools/idea/lucene/benchmark/src/benchmark.iml IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision 1525374) +++ dev-tools/idea/lucene/benchmark/src/benchmark.iml (revision ) @@ -24,6 +24,7 @@ + Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision ) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialFileQueryMaker.java (revision ) @@ -0,0 +1,109 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +/** + * NOCOMMIT + */ +public class SpatialFileQueryMaker extends AbstractQueryMaker { + protected SpatialStrategy strategy; + protected double distErrPct;//NaN if not set + protected SpatialOperation operation; + protected boolean score; + + private SpatialDocMaker.ShapeConverter shapeConverter; + + @Override + public void setConfig(Config config) throws Exception { + strategy = SpatialDocMaker.getSpatialStrategy(config); + shapeConverter = SpatialDocMaker.makeShapeConverter(strategy, config, "query.spatial."); + + distErrPct = config.get("query.spatial.distErrPct", Double.NaN); + operation = SpatialOperation.get(config.get("query.spatial.predicate", "Intersects")); + score = config.get("query.spatial.score", false); + + super.setConfig(config);//call last + } + + @Override + protected Query[] prepareQueries() throws Exception { + final int maxQueries = config.get("query.file.maxQueries", 1000); + Config srcConfig = new Config(new Properties()); + srcConfig.set("docs.file", config.get("query.file", null)); + srcConfig.set("line.parser", config.get("query.file.line.parser", null)); + srcConfig.set("content.source.forever", "false"); + + List queries = new ArrayList<>(); + LineDocSource src = new LineDocSource(); + try { + src.setConfig(srcConfig); + src.resetInputs(); + DocData docData = new DocData(); + for (int i = 0; i < maxQueries; i++) { + docData = src.getNextDocData(docData); + Shape shape = SpatialDocMaker.makeShapeFromString(strategy, docData.getName(), docData.getBody()); + if (shape != null) { + shape = shapeConverter.convert(shape); + queries.add(makeQueryFromShape(shape)); + } else { + i--;//skip + } + } + } catch (NoMoreDataException e) { + //all-done + } finally { + src.close(); + } + return queries.toArray(new Query[queries.size()]); + } + + + private Query makeQueryFromShape(Shape shape) { + SpatialArgs args = new SpatialArgs(operation, shape); + if (!Double.isNaN(distErrPct)) + args.setDistErrPct(distErrPct); + + if (score) { + //Queries generally score + return strategy.makeQuery(args); + } else { + //Filters do not score + Filter filter = strategy.makeFilter(args); + if (filter instanceof QueryWrapperFilter) { + return ((QueryWrapperFilter)filter).getQuery(); + } else { + return new ConstantScoreQuery(filter); + } + } + } + +} Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision ) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/GeonamesLineParser.java (revision ) @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A line parser for Geonames.org data. + * See 'geoname' table. + * Requires {@link SpatialDocMaker}. + */ +public class GeonamesLineParser extends LineDocSource.LineParser { + + /** This header will be ignored; the geonames format is fixed and doesn't have a header line. */ + public GeonamesLineParser(String[] header) { + super(header); + } + + @Override + public void parseLine(DocData docData, String line) { + String[] parts = line.split("\\t", 7);//no more than first 6 fields needed + + // Sample data line: + // 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ... + // ID, Name, Alternate name (unused), Lat, Lon, ... + + docData.setID(Integer.parseInt(parts[0]));//note: overwrites ID assigned by LineDocSource + docData.setName(parts[1]); + docData.setBody(parts[4]+","+parts[5]); // latitude , longitude + } +} Index: lucene/benchmark/build.xml IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/build.xml (revision 1525374) +++ lucene/benchmark/build.xml (revision ) @@ -37,6 +37,8 @@ + + @@ -62,6 +64,22 @@ + + + + + + + + + + + + + + + @@ -147,8 +165,10 @@ + + @@ -158,7 +178,8 @@ - + @@ -166,6 +187,7 @@ + @@ -256,7 +278,7 @@ - + Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision ) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision ) @@ -0,0 +1,186 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.shape.Point; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory; + +import java.util.AbstractMap; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +/** + * nocommit + */ +public class SpatialDocMaker extends DocMaker { + + public static final String SPATIAL_FIELD = "spatial"; + + //cache spatialStrategy by round number + private static Map spatialStrategyCache = new HashMap(); + + private SpatialStrategy strategy; + private ShapeConverter shapeConverter; + + /** + * Builds a SpatialStrategy from configuration options. Subsequent calls will return + * the same strategy instance for the given round -- + * {@link org.apache.lucene.benchmark.byTask.utils.Config#getRoundNumber()}. + */ + public static SpatialStrategy getSpatialStrategy(final Config config) { + SpatialStrategy result = spatialStrategyCache.get(config.getRoundNumber()); + if (result == null) { + result = initSpatialStrategy(config); + System.out.println("Spatial Strategy: " + result); + spatialStrategyCache.put(config.getRoundNumber(), result); + } + return result; + } + + private static SpatialStrategy initSpatialStrategy(final Config config) { + //A Map view of Config that prefixes keys with "spatial." + Map configMap = new AbstractMap() { + @Override + public Set> entrySet() { + throw new UnsupportedOperationException(); + } + + @Override + public String get(Object key) { + return config.get("spatial." + key, null); + } + }; + + SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null); + + SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx); + + //Some day the below might be initialized with a factory but such a factory + // is non-existent. + + RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) { + { + //protected field + pointsOnly = config.get("doc.spatial.pointsOnly", false); + } + }; + + int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4); + if (prefixGridScanLevel < 0) + prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel; + strategy.setPrefixGridScanLevel(prefixGridScanLevel); + + double distErrPct = config.get("spatial.distErrPct", .025);//doc & query + strategy.setDistErrPct(distErrPct); + return strategy; + } + + @Override + public void setConfig(Config config, ContentSource source) { + super.setConfig(config, source); + strategy = getSpatialStrategy(config); + shapeConverter = makeShapeConverter(strategy, config, "doc.spatial."); + } + + /** + * Configurably converts points to circles, and optionally bbox'es result. + * nocommit + */ + public static ShapeConverter makeShapeConverter(final SpatialStrategy spatialStrategy, + Config config, String configKeyPrefix) { + //by default does no conversion + final double radiusDegrees = config.get(configKeyPrefix+"radiusDegrees", 0.0); + final double plusMinus = config.get(configKeyPrefix+"radiusDegreesRandPlusMinus", 0.0); + final boolean bbox = config.get(configKeyPrefix + "bbox", false); + + return new ShapeConverter() { + @Override + public Shape convert(Shape shape) { + if (shape instanceof Point && radiusDegrees > 0.0) { + Point point = (Point)shape; + double radius = radiusDegrees; + if (plusMinus > 0.0) { + Random random = new Random(point.hashCode());//use hashCode so it's reproducibly random + radius += random.nextDouble() * 2 * plusMinus - plusMinus; + radius = Math.abs(radius);//can happen if configured plusMinus > radiusDegrees + } + shape = spatialStrategy.getSpatialContext().makeCircle(point, radius); + } + if (bbox) + shape = shape.getBoundingBox(); + return shape; + } + }; + } + + public interface ShapeConverter { + Shape convert(Shape shape); + } + + @Override + public Document makeDocument() throws Exception { + + DocState docState = getDocState(); + + Document doc = super.makeDocument(); + + // Set SPATIAL_FIELD from body + DocData docData = docState.docData; + // makeDocument() resets docState.getBody() so we can't look there; look in Document + String shapeStr = doc.getField(DocMaker.BODY_FIELD).stringValue(); + Shape shape = makeShapeFromString(strategy, docData.getName(), shapeStr); + if (shape != null) { + shape = shapeConverter.convert(shape); + //index + for (Field f : strategy.createIndexableFields(shape)) { + doc.add(f); + } + } + + return doc; + } + + public static Shape makeShapeFromString(SpatialStrategy strategy, String name, String shapeStr) { + if (shapeStr != null && shapeStr.length() > 0) { + try { + return strategy.getSpatialContext().readShape(shapeStr); + } catch (Exception e) {//InvalidShapeException TODO + System.err.println("Shape "+name+" wasn't parseable: "+e+" (skipping it)"); + return null; + } + } + return null; + } + + @Override + public Document makeDocument(int size) throws Exception { + //TODO consider abusing the 'size' notion to number of shapes per document + throw new UnsupportedOperationException(); + } +} Index: dev-tools/maven/lucene/benchmark/pom.xml.template IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- dev-tools/maven/lucene/benchmark/pom.xml.template (revision 1525374) +++ dev-tools/maven/lucene/benchmark/pom.xml.template (revision ) @@ -80,6 +80,11 @@ ${project.version} + ${project.groupId} + lucene-spatial + ${project.version} + + com.ibm.icu icu4j Index: dev-tools/idea/lucene/spatial/spatial.iml IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- dev-tools/idea/lucene/spatial/spatial.iml (revision 1525374) +++ dev-tools/idea/lucene/spatial/spatial.iml (revision ) @@ -11,7 +11,7 @@ - + Index: lucene/benchmark/conf/spatial.alg IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/benchmark/conf/spatial.alg (revision ) +++ lucene/benchmark/conf/spatial.alg (revision ) @@ -0,0 +1,96 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# ------------------------------------------------------------------------------------- +# Spatial search benchmark +# In order to use this, you'll need to first run 'ant geonames-files'. +# You may need more memory when running this: -Dtask.mem=1000M +# For docs on what options are available, see the javadocs. + +### Spatial Strategy config +doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker +#quad or geohash +spatial.prefixTree=geohash +#spatial.distErrPct=.25 +#spatial.prefixGridScanLevel=-4 +#spatial.maxLevels=11 +#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth +doc.spatial.pointsOnly=true + +### Source & Doc +content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource +line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser +docs.file=work/geonames/allCountries.txt +doc.tokenized=false + + +### Directory +#directory=FSDirectory +directory=RamDirectory +compound=false #faster +merge.factor=10 +ram.flush.mb=64 +concurrent.merge.scheduler.max.thread.count=2 + +### Query +query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker +query.file=work/geonames/allCountries.txt +query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser +query.file.maxQueries=1000 +query.spatial.radiusDegrees=10 +query.spatial.bbox=false +query.spatial.score=false +#query.spatial.predicate=Intersects +#query.spatial.distErrPct (defaults to spatial.distErrPct) + +### Misc + +# task at this depth or less would print when they start +log.step.AddDoc = 100000 +task.max.depth.log=1 + +# ------------------------------------------------------------------------------------- + + + +ResetSystemErase + +{ "Populate" + CreateIndex + [{ "MAddDocs" AddDoc} : 250000] : 4 + ForceMerge(1) + CommitIndex + CloseIndex +} + +RepSumByPref MAddDocs + +{ "Round" + OpenReader + {"WarmQueries" Search > : 1000 + {"RealQueries" Search > : 4000 + CloseReader + + ResetInputs + RepSumByName + NewRound +} : 1 + + +#RepSumByName +RepSumByPrefRound RealQueries + +