diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java index 6757f7b..c6d587b 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java @@ -213,6 +213,11 @@ public final class IndexDefinition implements Aggregate.AggregateMapper { private final boolean testMode; + /** + * See {@link LuceneIndexConstants#PROP_VALUE_REGEX} + */ + private final Pattern propertyRegex; + private final boolean evaluatePathRestrictions; private final IndexFormatVersion version; @@ -362,6 +367,12 @@ public final class IndexDefinition implements Aggregate.AggregateMapper { this.fullTextEnabled = hasFulltextEnabledIndexRule(definedIndexRules); this.evaluatePathRestrictions = getOptionalValue(defn, EVALUATE_PATH_RESTRICTION, false); + if (defn.hasProperty(PROP_VALUE_REGEX)) { + this.propertyRegex = Pattern.compile(getOptionalValue(defn, PROP_VALUE_REGEX, "")); + } else { + this.propertyRegex = null; + } + String functionName = getOptionalValue(defn, LuceneIndexConstants.FUNC_NAME, null); if (fullTextEnabled && functionName == null){ functionName = "lucene"; @@ -857,6 +868,10 @@ public final class IndexDefinition implements Aggregate.AggregateMapper { return false; } + public Pattern getPropertyRegex() { + return propertyRegex; + } + public boolean isSuggestEnabled() { return suggestEnabled; } diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java index 73b0332..344a8c8 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java @@ -25,6 +25,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Pattern; import javax.jcr.PropertyType; @@ -167,8 +168,12 @@ public class LuceneDocumentMaker { if (indexingRule.isFulltextEnabled()) { + Pattern propertyRegex = definition.getPropertyRegex(); + boolean shouldAdd = propertyRegex == null || propertyRegex.matcher(name).find(); + if (shouldAdd) { document.add(newFulltextField(name)); } + } if (definition.evaluatePathRestrictions()){ document.add(newAncestorsField(PathUtils.getParentPath(path))); @@ -273,6 +278,10 @@ public class LuceneDocumentMaker { if (pd.fulltextEnabled() && includeTypeForFullText) { for (String value : property.getValue(Type.STRINGS)) { + if (definition.getPropertyRegex() != null && !definition.getPropertyRegex().matcher(value).find()) { + continue; + } + if (!includePropertyValue(value, pd)){ continue; } diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java index e94c0ad..9cf1612 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java @@ -180,6 +180,13 @@ public interface LuceneIndexConstants { String EVALUATE_PATH_RESTRICTION = "evaluatePathRestrictions"; /** + * The property name to specify a regular expression for property value in index definition. If this property is present + * in index definition, then only those properties would be added to index whose value matches the regex defined by + * this property. + */ + String PROP_VALUE_REGEX = "valueRegex"; + + /** * Experimental config to restrict which property type gets indexed at * property definition level. Mostly index rule level #INCLUDE_PROPERTY_TYPES * should be sufficient diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java index 4c19cc5..c1477e1 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java @@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.plugins.index.lucene.writer; import java.io.IOException; import java.util.Calendar; +import java.util.Iterator; import java.util.List; import com.google.common.io.Closer; @@ -30,6 +31,7 @@ import org.apache.jackrabbit.oak.commons.PerfLogger; import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition; import org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryFactory; import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper; +import org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.util.ISO8601; import org.apache.lucene.analysis.Analyzer; @@ -79,11 +81,24 @@ class DefaultIndexWriter implements LuceneIndexWriter { @Override public void updateDocument(String path, Iterable doc) throws IOException { + Iterator f = doc.iterator(); + String fieldName = f.hasNext() ? f.next().name() : null; + boolean containsOnlyPath = FieldNames.PATH.equals(fieldName) && !f.hasNext(); + boolean isPropertyRegexMatchingEnabled = definition.getPropertyRegex() != null; if (reindex) { + if (containsOnlyPath && isPropertyRegexMatchingEnabled) { + return; + } getWriter().addDocument(doc); } else { + // if the new document only contains path field, we don't add it to index. Instead we delete existing + // document of the same path. + if (containsOnlyPath && isPropertyRegexMatchingEnabled) { + getWriter().deleteDocuments(newPathTerm(path)); + } else { getWriter().updateDocument(newPathTerm(path), doc); } + } indexUpdated = true; } diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java index 322ba59..d711438 100644 --- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java @@ -43,7 +43,9 @@ import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; import static org.apache.jackrabbit.JcrConstants.NT_UNSTRUCTURED; import static org.apache.jackrabbit.oak.api.Type.STRING; import static org.apache.jackrabbit.oak.api.Type.STRINGS; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME; import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.useV2; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_VALUE_REGEX; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -53,10 +55,12 @@ import static org.junit.Assert.assertTrue; */ public class LuceneIndexQueryTest extends AbstractQueryTest { + private Tree indexDefn; + @Override protected void createTestIndexNode() throws Exception { Tree index = root.getTree("/"); - Tree indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE); + indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE); useV2(indexDefn); indexDefn.setProperty(LuceneIndexConstants.TEST_MODE, true); indexDefn.setProperty(LuceneIndexConstants.EVALUATE_PATH_RESTRICTION, true); @@ -104,6 +108,36 @@ public class LuceneIndexQueryTest extends AbstractQueryTest { } @Test + public void testValueRegex() throws Exception { + Tree test = root.getTree("/").addChild("test"); + Tree a = test.addChild("a"); + Tree b = test.addChild("b"); + a.setProperty("name", "hello"); + b.setProperty("name", "hello pattern"); + root.commit(); + + final String query = "select [jcr:path] from [nt:base] where isdescendantnode('/test') and contains(*, 'hello')"; + + Iterator result = executeQuery(query,"JCR-SQL2").iterator(); + List paths = new ArrayList<>(); + result.forEachRemaining(paths::add); + assertEquals(2, paths.size()); + assertEquals(paths.get(0), a.getPath()); + assertEquals(paths.get(1), b.getPath()); + + indexDefn.setProperty(PROP_VALUE_REGEX, "pat*"); + indexDefn.setProperty(REINDEX_PROPERTY_NAME, true); + root.commit(); + + result = executeQuery(query,"JCR-SQL2").iterator(); + paths.clear(); + result.forEachRemaining(paths::add); + assertEquals(1, paths.size()); + assertEquals(paths.get(0), b.getPath()); + + } + + @Test public void descendantTest() throws Exception { Tree test = root.getTree("/").addChild("test"); test.addChild("a");