Index: oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java =================================================================== --- oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (revision 1597712) +++ oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (working copy) @@ -16,13 +16,23 @@ */ package org.apache.jackrabbit.oak.plugins.index.solr.query; +import java.io.IOException; import java.util.Collection; +import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; import javax.annotation.CheckForNull; +import com.google.common.collect.AbstractIterator; +import com.google.common.collect.Queues; +import com.google.common.collect.Sets; import org.apache.jackrabbit.JcrConstants; import org.apache.jackrabbit.oak.api.PropertyValue; import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator; import org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration; +import org.apache.jackrabbit.oak.query.QueryEngineSettings; import org.apache.jackrabbit.oak.query.QueryImpl; import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd; import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression; @@ -30,6 +40,7 @@ import org.apache.jackrabbit.oak.query.fulltext.FullTextTerm; import org.apache.jackrabbit.oak.query.fulltext.FullTextVisitor; import org.apache.jackrabbit.oak.spi.query.Cursor; +import org.apache.jackrabbit.oak.spi.query.Cursors; import org.apache.jackrabbit.oak.spi.query.Filter; import org.apache.jackrabbit.oak.spi.query.IndexRow; import org.apache.jackrabbit.oak.spi.query.PropertyValues; @@ -38,14 +49,15 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath; +import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth; import static org.apache.jackrabbit.oak.commons.PathUtils.getName; +import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; /** * A Solr based {@link QueryIndex} @@ -107,8 +119,9 @@ StringBuilder queryBuilder = new StringBuilder(); - if (filter.getFullTextConstraint() != null) { - queryBuilder.append(getFullTextQuery(filter.getFullTextConstraint())); + FullTextExpression ft = filter.getFullTextConstraint(); + if (ft != null) { + queryBuilder.append(getFullTextQuery(ft)); queryBuilder.append(' '); } else if (filter.getFulltextConditions() != null) { Collection fulltextConditions = filter.getFulltextConditions(); @@ -324,6 +337,39 @@ return fullTextString.toString(); } + /** + * Get the set of relative paths of a full-text condition. For example, for + * the condition "contains(a/b, 'hello') and contains(c/d, 'world'), the set + * { "a", "c" } is returned. If there are no relative properties, then one + * entry is returned (the empty string). If there is no expression, then an + * empty set is returned. + * + * @param ft the full-text expression + * @return the set of relative paths (possibly empty) + */ + private static Set getRelativePaths(FullTextExpression ft) { + final HashSet relPaths = new HashSet(); + ft.accept(new FullTextVisitor.FullTextVisitorBase() { + + @Override + public boolean visit(FullTextTerm term) { + String p = term.getPropertyName(); + if (p == null) { + relPaths.add(""); + } else if (p.startsWith("../") || p.startsWith("./")) { + throw new IllegalArgumentException("Relative parent is not supported:" + p); + } else if (getDepth(p) > 1) { + String parent = getParentPath(p); + relPaths.add(parent); + } else { + relPaths.add(""); + } + return true; + } + }); + return relPaths; + } + private boolean isSupportedHttpRequest(String nativeQueryString) { // the query string starts with ${supported-handler.selector}? return nativeQueryString.matches("(mlt|query|select|get)\\\\?.*"); @@ -331,7 +377,7 @@ private void setDefaults(SolrQuery solrQuery) { solrQuery.setParam("q.op", "AND"); - solrQuery.setParam("fl", "* score"); + solrQuery.setParam("fl", configuration.getPathField() + " score"); String catchAllField = configuration.getCatchAllField(); if (catchAllField != null && catchAllField.length() > 0) { solrQuery.setParam("df", catchAllField); @@ -367,107 +413,198 @@ } @Override - public Cursor query(Filter filter, NodeState root) { - if (log.isDebugEnabled()) { - log.debug("converting filter {}", filter); - } + public Cursor query(final Filter filter, NodeState root) { Cursor cursor; try { - SolrQuery query = getQuery(filter); - if (log.isDebugEnabled()) { - log.debug("sending query {}", query); - } - QueryResponse queryResponse = solrServer.query(query); - if (log.isDebugEnabled()) { - log.debug("getting response {}", queryResponse); - } - cursor = new SolrCursor(queryResponse, query); + final Set relPaths = filter.getFullTextConstraint() != null ? getRelativePaths(filter.getFullTextConstraint()) : Collections.emptySet(); + final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next(); + // we only restrict non-full-text conditions if there is + // no relative property in the full-text constraint + final int parentDepth = getDepth(parent); + + + cursor = new SolrRowCursor(new AbstractIterator() { + private final Set seenPaths = Sets.newHashSet(); + private final Deque queue = Queues.newArrayDeque(); + private SolrDocument lastDoc; + public int offset = 0; + + @Override + protected SolrResultRow computeNext() { + while (!queue.isEmpty() || loadDocs()) { + return queue.remove(); + } + return endOfData(); + } + + private SolrResultRow convertToRow(SolrDocument doc) throws IOException { + String path = String.valueOf(doc.getFieldValue(configuration.getPathField())); + if (path != null) { + if ("".equals(path)) { + path = "/"; + } + if (!parent.isEmpty()) { + path = getAncestorPath(path, parentDepth); + // avoid duplicate entries + if (seenPaths.contains(path)) { + return null; + } + seenPaths.add(path); + } + + float score = 0f; + Object scoreObj = doc.get("score"); + if (scoreObj != null) { + score = (Float) scoreObj; + } + return new SolrResultRow(path, score, doc); + } + return null; + } + + /** + * Loads the Solr documents in batches + * @return true if any document is loaded + */ + private boolean loadDocs() { + SolrDocument lastDocToRecord = null; + + try { + if (log.isDebugEnabled()) { + log.debug("converting filter {}", filter); + } + SolrQuery query = getQuery(filter); + if (lastDoc != null) { + offset++; + int newOffset = offset * configuration.getRows(); + query.setParam("start", String.valueOf(newOffset)); + } + if (log.isDebugEnabled()) { + log.debug("sending query {}", query); + } + SolrDocumentList docs = solrServer.query(query).getResults(); + + if (log.isDebugEnabled()) { + log.debug("getting docs {}", docs); + } + + for (SolrDocument doc : docs) { + SolrResultRow row = convertToRow(doc); + if (row != null) { + queue.add(row); + } + lastDocToRecord = doc; + } + } catch (Exception e) { + if (log.isWarnEnabled()) { + log.warn("query via {} failed.", solrServer, e); + } + } + if (lastDocToRecord != null) { + this.lastDoc = lastDocToRecord; + } + + return !queue.isEmpty(); + } + + }, filter.getQueryEngineSettings()); } catch (Exception e) { throw new RuntimeException(e); } return cursor; } + static class SolrResultRow { + final String path; + final double score; + SolrDocument doc; - private class SolrCursor implements Cursor { + SolrResultRow(String path, double score) { + this.path = path; + this.score = score; + } - private SolrDocumentList results; + SolrResultRow(String path, double score, SolrDocument doc) { + this.path = path; + this.score = score; + this.doc = doc; + } - private SolrQuery query; + @Override + public String toString() { + return String.format("%s (%1.2f)", path, score); + } + } - private int counter; - private int offset; + /** + * A cursor over Solr results. The result includes the path and the jcr:score pseudo-property as returned by Solr, + * plus, eventually, the returned stored values if {@link org.apache.solr.common.SolrDocument} is included in the + * {@link org.apache.jackrabbit.oak.plugins.index.solr.query.SolrQueryIndex.SolrResultRow}. + */ + static class SolrRowCursor implements Cursor { - public SolrCursor(QueryResponse queryResponse, SolrQuery query) { - this.results = queryResponse.getResults(); - this.counter = 0; - this.offset = 0; - this.query = query; + private final Cursor pathCursor; + SolrResultRow currentRow; + + SolrRowCursor(final Iterator it, QueryEngineSettings settings) { + Iterator pathIterator = new Iterator() { + + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public String next() { + currentRow = it.next(); + return currentRow.path; + } + + @Override + public void remove() { + it.remove(); + } + + }; + pathCursor = new Cursors.PathCursor(pathIterator, true, settings); } + @Override public boolean hasNext() { - return results != null && offset + counter < results.getNumFound(); + return pathCursor.hasNext(); } @Override public void remove() { - results.remove(counter); + pathCursor.remove(); } + @Override public IndexRow next() { - if (counter < results.size() || updateResults()) { - final SolrDocument doc = results.get(counter); - counter++; - return new IndexRow() { - @Override - public String getPath() { - return String.valueOf(doc.getFieldValue( - configuration.getPathField())); - } + final IndexRow pathRow = pathCursor.next(); + return new IndexRow() { - @Override - public PropertyValue getValue(String columnName) { - if (QueryImpl.JCR_SCORE.equals(columnName)) { - float score = 0f; - Object scoreObj = doc.get("score"); - if (scoreObj != null) { - score = (Float) scoreObj; - } - return PropertyValues.newDouble((double) score); - } - Object o = doc.getFieldValue(columnName); - return o == null ? null : PropertyValues.newString(o.toString()); + @Override + public String getPath() { + return pathRow.getPath(); + } + + @Override + public PropertyValue getValue(String columnName) { + // overlay the score + if (QueryImpl.JCR_SCORE.equals(columnName)) { + return PropertyValues.newDouble(currentRow.score); } + // TODO : make inclusion of doc configurable + return currentRow.doc != null ? PropertyValues.newString( + String.valueOf(currentRow.doc.getFieldValue(columnName))) : null; + } - }; - } else { - return null; - } + }; } - - private boolean updateResults() { - int newOffset = offset + results.size(); - query.setParam("start", String.valueOf(newOffset)); - try { - QueryResponse queryResponse = solrServer.query(query); - SolrDocumentList localResults = queryResponse.getResults(); - boolean hasMoreResults = localResults.size() > 0; - if (hasMoreResults) { - counter = 0; - offset = newOffset; - results = localResults; - } else { - query.setParam("start", String.valueOf(offset)); - } - return hasMoreResults; - } catch (SolrServerException e) { - throw new RuntimeException("error retrieving paged results", e); - } - } } - @Override @CheckForNull public NodeAggregator getNodeAggregator() { Index: oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java =================================================================== --- oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java (revision 1597712) +++ oak-solr-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrIndexQueryTest.java (working copy) @@ -285,4 +285,133 @@ root.commit(); assertQuery("//*[jcr:contains(., '美女')]", "xpath", ImmutableList.of(one.getPath())); } + + @Test + public void testCompositeRepExcerpt() throws Exception { + String sqlQuery = "select [jcr:path], [jcr:score], [rep:excerpt] from [nt:base] as a " + + "where (contains([jcr:content/*], 'square') or contains([jcr:content/jcr:title], 'square')" + + " or contains([jcr:content/jcr:description], 'square')) and isdescendantnode(a, '/test') " + + "order by [jcr:score] desc"; + Tree tree = root.getTree("/"); + Tree test = tree.addChild("test"); + Tree child = test.addChild("child"); + Tree a = child.addChild("a"); + a.setProperty("jcr:title", "Hello World, today square is nice"); + Tree b = child.addChild("b"); + b.setProperty("jcr:description", "Cheers World, today weather is squary nice"); + Tree c = child.addChild("c"); + c.setProperty("jcr:title", "Halo Welt, today sky is square"); + root.commit(); + + Iterator strings = executeQuery(sqlQuery, "JCR-SQL2").iterator(); + assertTrue(strings.hasNext()); + assertTrue(strings.next().startsWith("/test/child,")); + assertFalse(strings.hasNext()); + } + + @Test + public void contains() throws Exception { + String h = "Hello" + System.currentTimeMillis(); + String w = "World" + System.currentTimeMillis(); + + Tree test = root.getTree("/").addChild("test"); + test.addChild("a").setProperty("name", asList(h, w), STRINGS); + test.addChild("b").setProperty("name", h); + root.commit(); + + // query 'hello' + StringBuffer stmt = new StringBuffer(); + stmt.append("/jcr:root//*[jcr:contains(., '").append(h); + stmt.append("')]"); + assertQuery(stmt.toString(), "xpath", + ImmutableList.of("/test/a", "/test/b")); + + // query 'world' + stmt = new StringBuffer(); + stmt.append("/jcr:root//*[jcr:contains(., '").append(w); + stmt.append("')]"); + assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a")); + + } + + @Test + @Ignore("depends on chosen text_general tokenizer") + public void containsDash() throws Exception { + Tree test = root.getTree("/").addChild("test"); + test.addChild("a").setProperty("name", "hello-wor"); + test.addChild("b").setProperty("name", "hello-world"); + test.addChild("c").setProperty("name", "hello"); + root.commit(); + + assertQuery("/jcr:root//*[jcr:contains(., 'hello-wor*')]", "xpath", + ImmutableList.of("/test/a", "/test/b")); + assertQuery("/jcr:root//*[jcr:contains(., '*hello-wor*')]", "xpath", + ImmutableList.of("/test/a", "/test/b")); + + } + + @Test + public void multiPhraseQuery() throws Exception { + Tree test = root.getTree("/").addChild("test"); + test.addChild("a").setProperty("dc:format", "type:application/pdf"); + root.commit(); + + assertQuery( + "/jcr:root//*[jcr:contains(@dc:format, 'type:appli*')]", + "xpath", ImmutableList.of("/test/a")); + + } + + @Test + public void containsPath() throws Exception { + + Tree test = root.getTree("/").addChild("test"); + test.addChild("a").setProperty("name", "/parent/child/node"); + root.commit(); + + StringBuffer stmt = new StringBuffer(); + stmt.append("//*[jcr:contains(., '/parent/child')]"); + assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a")); + + } + + @Test + public void containsPathNum() throws Exception { + + Tree test = root.getTree("/").addChild("test"); + Tree a = test.addChild("a"); + a.setProperty("name", "/segment1/segment2/segment3"); + root.commit(); + + StringBuffer stmt = new StringBuffer(); + stmt.append("//*[jcr:contains(., '/segment1/segment2')]"); + assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a")); + + } + + /** + * OAK-1208 property existence constraints break queries + */ + @Test + public void testOAK1208() throws Exception { + Tree t = root.getTree("/").addChild("containsWithMultipleOr"); + Tree one = t.addChild("one"); + one.setProperty("p", "dam/smartcollection"); + one.setProperty("t", "media"); + + Tree two = t.addChild("two"); + two.setProperty("p", "dam/collection"); + two.setProperty("t", "media"); + + Tree three = t.addChild("three"); + three.setProperty("p", "dam/hits"); + three.setProperty("t", "media"); + + root.commit(); + + StringBuffer stmt = new StringBuffer(); + stmt.append("//*[jcr:contains(., 'media') and (@p = 'dam/smartcollection' or @p = 'dam/collection') ]"); + assertQuery(stmt.toString(), "xpath", + ImmutableList.of(one.getPath(), two.getPath())); + } } Index: oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java =================================================================== --- oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java (revision 1597712) +++ oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java (working copy) @@ -88,24 +88,23 @@ q = qm.createQuery(sql2, Query.JCR_SQL2); assertEquals("/testroot/node2, /testroot/node3", getResult(q.execute(), "path")); -// -// sql2 = "select [jcr:path] as [path] from [nt:base] " -// + "where contains([node1/text], 'hello') order by [jcr:path]"; -// q = qm.createQuery(sql2, Query.JCR_SQL2); -// assertEquals("/testroot", getResult(q.execute(), "path")); -// -// sql2 = "select [jcr:path] as [path] from [nt:base] " -// + "where contains([node2/text], 'hello OR hallo') order by [jcr:path]"; -// q = qm.createQuery(sql2, Query.JCR_SQL2); -// assertEquals("/testroot", getResult(q.execute(), "path")); - // TODO OAK-890 - // sql2 = "select [jcr:path] as [path] from [nt:base] " - // + "where contains([node1/text], 'hello') " - // + "and contains([node2/text], 'hallo') " - // + "order by [jcr:path]"; - // q = qm.createQuery(sql2, Query.JCR_SQL2); - // assertEquals("/testroot", getResult(q.execute(), "path")); + sql2 = "select [jcr:path] as [path] from [nt:base] " + + "where contains([node1/text], 'hello') order by [jcr:path]"; + q = qm.createQuery(sql2, Query.JCR_SQL2); + assertEquals("/testroot", getResult(q.execute(), "path")); + + sql2 = "select [jcr:path] as [path] from [nt:base] " + + "where contains([node2/text], 'hello OR hallo') order by [jcr:path]"; + q = qm.createQuery(sql2, Query.JCR_SQL2); + assertEquals("/testroot", getResult(q.execute(), "path")); + + sql2 = "select [jcr:path] as [path] from [nt:base] " + + "where contains([node1/text], 'hello') " + + "and contains([node2/text], 'hallo') " + + "order by [jcr:path]"; + q = qm.createQuery(sql2, Query.JCR_SQL2); + assertEquals("/testroot", getResult(q.execute(), "path")); } static String getResult(QueryResult result, String propertyName) throws RepositoryException {