Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java =================================================================== --- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java (revision 1876798) +++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java (working copy) @@ -213,6 +213,11 @@ private final boolean testMode; + /** + * See {@link LuceneIndexConstants#PROP_VALUE_REGEX} + */ + private final Pattern propertyRegex; + private final boolean evaluatePathRestrictions; private final IndexFormatVersion version; @@ -362,6 +367,12 @@ this.fullTextEnabled = hasFulltextEnabledIndexRule(definedIndexRules); this.evaluatePathRestrictions = getOptionalValue(defn, EVALUATE_PATH_RESTRICTION, false); + if (defn.hasProperty(PROP_VALUE_REGEX)) { + this.propertyRegex = Pattern.compile(getOptionalValue(defn, PROP_VALUE_REGEX, "")); + } else { + this.propertyRegex = null; + } + String functionName = getOptionalValue(defn, LuceneIndexConstants.FUNC_NAME, null); if (fullTextEnabled && functionName == null){ functionName = "lucene"; @@ -857,6 +868,10 @@ return false; } + public Pattern getPropertyRegex() { + return propertyRegex; + } + public boolean isSuggestEnabled() { return suggestEnabled; } Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java =================================================================== --- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (revision 1876798) +++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (working copy) @@ -25,6 +25,7 @@ import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Pattern; import javax.jcr.PropertyType; @@ -167,7 +168,11 @@ if (indexingRule.isFulltextEnabled()) { - document.add(newFulltextField(name)); + Pattern propertyRegex = definition.getPropertyRegex(); + boolean shouldAdd = propertyRegex == null || propertyRegex.matcher(name).find(); + if (shouldAdd) { + document.add(newFulltextField(name)); + } } if (definition.evaluatePathRestrictions()){ @@ -273,6 +278,10 @@ if (pd.fulltextEnabled() && includeTypeForFullText) { for (String value : property.getValue(Type.STRINGS)) { + if (definition.getPropertyRegex() != null && !definition.getPropertyRegex().matcher(value).find()) { + continue; + } + if (!includePropertyValue(value, pd)){ continue; } Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java =================================================================== --- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (revision 1876798) +++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (working copy) @@ -180,6 +180,13 @@ String EVALUATE_PATH_RESTRICTION = "evaluatePathRestrictions"; /** + * The property name to specify a regular expression for property value in index definition. If this property is present + * in index definition, then only those properties would be added to index whose value matches the regex defined by + * this property. + */ + String PROP_VALUE_REGEX = "valueRegex"; + + /** * Experimental config to restrict which property type gets indexed at * property definition level. Mostly index rule level #INCLUDE_PROPERTY_TYPES * should be sufficient Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java =================================================================== --- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java (revision 1876798) +++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java (working copy) @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Calendar; +import java.util.Iterator; import java.util.List; import com.google.common.io.Closer; @@ -30,6 +31,7 @@ import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition; import org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryFactory; import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper; +import org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.util.ISO8601; import org.apache.lucene.analysis.Analyzer; @@ -79,10 +81,23 @@ @Override public void updateDocument(String path, Iterable doc) throws IOException { + Iterator f = doc.iterator(); + String fieldName = f.hasNext() ? f.next().name() : null; + boolean containsOnlyPath = FieldNames.PATH.equals(fieldName) && !f.hasNext(); + boolean isPropertyRegexMatchingEnabled = definition.getPropertyRegex() != null; if (reindex) { + if (containsOnlyPath && isPropertyRegexMatchingEnabled) { + return; + } getWriter().addDocument(doc); } else { - getWriter().updateDocument(newPathTerm(path), doc); + // if the new document only contains path field, we don't add it to index. Instead we delete existing + // document of the same path. + if (containsOnlyPath && isPropertyRegexMatchingEnabled) { + getWriter().deleteDocuments(newPathTerm(path)); + } else { + getWriter().updateDocument(newPathTerm(path), doc); + } } indexUpdated = true; } Index: oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java =================================================================== --- oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (revision 1876798) +++ oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (working copy) @@ -43,7 +43,9 @@ import static org.apache.jackrabbit.JcrConstants.NT_UNSTRUCTURED; import static org.apache.jackrabbit.oak.api.Type.STRING; import static org.apache.jackrabbit.oak.api.Type.STRINGS; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME; import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.useV2; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_VALUE_REGEX; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -53,10 +55,12 @@ */ public class LuceneIndexQueryTest extends AbstractQueryTest { + private Tree indexDefn; + @Override protected void createTestIndexNode() throws Exception { Tree index = root.getTree("/"); - Tree indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE); + indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE); useV2(indexDefn); indexDefn.setProperty(LuceneIndexConstants.TEST_MODE, true); indexDefn.setProperty(LuceneIndexConstants.EVALUATE_PATH_RESTRICTION, true); @@ -104,6 +108,36 @@ } @Test + public void testValueRegex() throws Exception { + Tree test = root.getTree("/").addChild("test"); + Tree a = test.addChild("a"); + Tree b = test.addChild("b"); + a.setProperty("name", "hello"); + b.setProperty("name", "hello pattern"); + root.commit(); + + final String query = "select [jcr:path] from [nt:base] where isdescendantnode('/test') and contains(*, 'hello')"; + + Iterator result = executeQuery(query,"JCR-SQL2").iterator(); + List paths = new ArrayList<>(); + result.forEachRemaining(paths::add); + assertEquals(2, paths.size()); + assertEquals(paths.get(0), a.getPath()); + assertEquals(paths.get(1), b.getPath()); + + indexDefn.setProperty(PROP_VALUE_REGEX, "pat*"); + indexDefn.setProperty(REINDEX_PROPERTY_NAME, true); + root.commit(); + + result = executeQuery(query,"JCR-SQL2").iterator(); + paths.clear(); + result.forEachRemaining(paths::add); + assertEquals(1, paths.size()); + assertEquals(paths.get(0), b.getPath()); + + } + + @Test public void descendantTest() throws Exception { Tree test = root.getTree("/").addChild("test"); test.addChild("a");