Details
-
Improvement
-
Status: Open
-
Major
-
Resolution: Unresolved
-
Jena 4.4.0
-
None
Description
The execution time of equivalent SPARQL queries using Path Alternative or UNION differ considerably. Example:
SELECT * WHERE {?a <http://example.org/property1>|<http://example.org/property2> ?b}
SELECT * WHERE {{?a <http://example.org/property1> ?b } UNION { ?a <http://example.org/property2> ?b}}
Java MWE with 100000 bindings for each alternative:
import org.apache.jena.query.Query; import org.apache.jena.query.QueryExecutionFactory; import org.apache.jena.query.QueryFactory; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.ResourceFactory; import org.junit.jupiter.api.Test; public class PathAlternativeVsUnion { @Test public void compare() { Model model = ModelFactory.createDefaultModel(); int scale = 100000; Property property1 = ResourceFactory.createProperty("http://example.org/property1"); Property property2 = ResourceFactory.createProperty("http://example.org/property2"); for (int i = 0; i < scale; i++) { model.createResource("http://example.org/r" + i).addProperty(property1, ResourceFactory.createResource("http://example.org/r" + (scale + i))); model.createResource("http://example.org/r" + (scale * 2 + i)).addProperty(property2, ResourceFactory.createResource("http://example.org/r" + (scale * 3 + i))); } Query pathAlternativeQuery = QueryFactory.create("SELECT * WHERE {?a <" + property1 + ">|<" + property2 + "> ?b}"); Query unionQuery = QueryFactory .create("SELECT * WHERE {{?a <" + property1 + "> ?b } UNION { ?a <" + property2 + "> ?b}}"); // warm up pathAlternativeQuery QueryExecutionFactory.create(pathAlternativeQuery, model).execSelect().forEachRemaining(qs -> { /* consume but do nothing */}); // measure pathAlternativeQuery long start = System.currentTimeMillis(); QueryExecutionFactory.create(pathAlternativeQuery, model).execSelect().forEachRemaining(qs -> { /* consume but do nothing */}); long finish = System.currentTimeMillis(); System.out.println("Time pathAlternativeQuery: " + (finish - start) + " ms"); // warm up unionQuery QueryExecutionFactory.create(unionQuery, model).execSelect().forEachRemaining(qs -> { /* consume but do nothing */}); // measure unionQuery start = System.currentTimeMillis(); QueryExecutionFactory.create(unionQuery, model).execSelect().forEachRemaining(qs -> { /* consume but do nothing */}); finish = System.currentTimeMillis(); System.out.println("Time unionQuery: " + (finish - start) + " ms"); } }
Result:
Time pathAlternativeQuery: 10940 ms Time unionQuery: 145 ms
Is it possible to add some automatic execution plan optimization for that to Apache Jena?