diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/DumpNodeState.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/DumpNodeState.java new file mode 100644 index 0000000..ead451f --- /dev/null +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/DumpNodeState.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.indexStructure; + +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.plugins.document.util.Utils; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeState; + +public class DumpNodeState { + private static String SINGLE_INDENT = " "; + + static void logNode(NodeState node, String rootNodeName) { + logNode(node, rootNodeName, ""); + } + static void logNode(NodeState node, String rootNodeName, String indent) { + log(String.format("%1$s+%2$s", indent, rootNodeName)); + for (PropertyState ps : node.getProperties()) { + if (Utils.isHiddenPath("/" + ps.getName()))continue; + logProperty(ps, indent + SINGLE_INDENT); + } + for (ChildNodeEntry cne : node.getChildNodeEntries()) { + if (Utils.isHiddenPath("/" + cne.getName()))continue; + logNode(cne.getNodeState(), cne.getName(), indent + SINGLE_INDENT); + } + } + + private static void logProperty(PropertyState ps, String indent) { + log(indent + "-" + ps.toString()); + } + + private static void log(String str) { + System.out.println(str); + } +} diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/IndexedFieldsTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/IndexedFieldsTest.java new file mode 100644 index 0000000..78d3233 --- /dev/null +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/IndexedFieldsTest.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.indexStructure; + +import org.apache.jackrabbit.oak.Oak; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.ContentRepository; +import org.apache.jackrabbit.oak.api.Tree; +import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider; +import org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent; +import org.apache.jackrabbit.oak.query.AbstractQueryTest; +import org.apache.jackrabbit.oak.spi.commit.Observer; +import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider; +import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.junit.Test; + +import java.io.IOException; + +public class IndexedFieldsTest extends AbstractQueryTest { + private LuceneIndexEditorProvider editorProvider; + private NodeStore nodeStore; + private LuceneIndexProvider provider; + + @Override + protected ContentRepository createRepository() { + editorProvider = new LuceneIndexEditorProvider(); + provider = new LuceneIndexProvider(); + nodeStore = new MemoryNodeStore(); + return new Oak(nodeStore) + .with(new InitialContent()) + .with(new OpenSecurityProvider()) + .with((QueryIndexProvider) provider) + .with((Observer) provider) + .with(editorProvider) + .createContentRepository(); + } + + @Test + public void fulltextSearchWithCustomAnalyzer() throws Exception { + Tree oakIndex = root.getTree("/oak:index"); + + String propIndexName = "propIdx"; + createPropIndex(oakIndex, propIndexName); + + String analyzedIndexName = "analyzedIdx"; + createAnalyzedIndex(oakIndex, analyzedIndexName); + + String nodeScopedIndexName = "nodeScopedIdx"; + createNodeScopedIndex(oakIndex, nodeScopedIndexName); + + Tree test = root.getTree("/").addChild("test"); + test.setProperty("foo", "fox jumping"); + test.addChild("testChild").setProperty("bar", "dog jumping"); + + test.addChild("test1").addChild("testChild").setProperty("bar", "dog jumping"); + test.addChild("test2").addChild("testChild").setProperty("barX", "dog jumping"); + root.commit(); + + NodeState rootState = nodeStore.getRoot(); + + System.out.println("----------------CONTENT-------------------"); + DumpNodeState.logNode(rootState.getChildNode("test"), "/test"); + + dumpIndex(rootState, propIndexName); + dumpIndex(rootState, analyzedIndexName); + dumpIndex(rootState, nodeScopedIndexName); + } + + private void dumpIndex(NodeState root, String indexName) throws IOException { + System.out.println("\n----------------" + indexName + "--------------"); + System.out.println("Definition"); + System.out.println("----------"); + DumpNodeState.logNode(root.getChildNode("oak:index").getChildNode(indexName), "/oak:index/" + indexName); + System.out.println("Index"); + System.out.println("-----"); + LuceneIndexParser.getIndexStructure(root, indexName).dump(); + } + + private String createPropIndex(Tree oakIndex, String indexName) throws CommitFailedException { + Tree indexTree = oakIndex.addChild(indexName); + IndexDefinitionBuilder idxBuilder = new IndexDefinitionBuilder(); + idxBuilder.includedPaths("/test") + .indexRule("nt:base") + .property("foo").propertyIndex().enclosingRule() + .property("bar", "testChild/bar").propertyIndex().enclosingRule() + .property("allBar", "testChild/ba.*", true).propertyIndex().enclosingRule() + ; + idxBuilder.noAsync().build(indexTree); + root.commit(); + return indexName; + } + + private String createAnalyzedIndex(Tree oakIndex, String indexName) throws CommitFailedException { + Tree indexTree = oakIndex.addChild(indexName); + IndexDefinitionBuilder idxBuilder = new IndexDefinitionBuilder(); + idxBuilder.includedPaths("/test") + .indexRule("nt:base") + .property("foo").analyzed().enclosingRule() + .property("bar", "testChild/bar").analyzed().enclosingRule() + .property("allBar", "testChild/ba.*", true).analyzed().enclosingRule() + ; + idxBuilder.noAsync().build(indexTree); + root.commit(); + return indexName; + } + + private String createNodeScopedIndex(Tree oakIndex, String indexName) throws CommitFailedException { + Tree indexTree = oakIndex.addChild(indexName); + IndexDefinitionBuilder idxBuilder = new IndexDefinitionBuilder(); + idxBuilder.includedPaths("/test") + .indexRule("nt:base") + .property("foo").nodeScopeIndex().enclosingRule() + .property("bar", "testChild/bar").nodeScopeIndex().enclosingRule() + .property("allBar", "testChild/ba.*", true).nodeScopeIndex().enclosingRule() + ; + idxBuilder.noAsync().build(indexTree); + root.commit(); + return indexName; + } +} diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/LuceneIndexParser.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/LuceneIndexParser.java new file mode 100644 index 0000000..59a32b9 --- /dev/null +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/indexStructure/LuceneIndexParser.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.indexStructure; + +import com.google.common.collect.Lists; +import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition; +import org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectory; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class LuceneIndexParser { + public static IndexStructure getIndexStructure(NodeState root, String indexName) throws IOException { + return getIndexStructure(root, indexName, ""); + } + public static IndexStructure getIndexStructure(NodeState root, String indexName, String indent) throws IOException { + NodeState idx = root.getChildNode("oak:index").getChildNode(indexName); + + OakDirectory dir = new OakDirectory(idx.builder(), + new IndexDefinition(root, idx, "/oak:index/" + indexName), + true); + IndexStructure index = new IndexStructure(); + index.indent = indent; + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext arc : reader.leaves()) { + AtomicReader ar = arc.reader(); + Fields flds = ar.fields(); + + Iterator fldsIter = flds.iterator(); + while (fldsIter.hasNext()) { + String fld = fldsIter.next(); + + if (":path".equals(fld)) continue; + + Bits matchAll = new Bits.MatchAllBits(ar.getDocCount(fld)); + + Terms terms = flds.terms(fld); + long size = terms.size(); + + TermsEnum termsEnum = terms.iterator(TermsEnum.EMPTY); + for (long i = 0; i < size; i++) { + BytesRef termBR = termsEnum.next(); + String term = termBR.utf8ToString(); + + List paths = Lists.newArrayList(); + + DocsEnum docsEnum = termsEnum.docs(matchAll, null); + int docId = docsEnum.nextDoc(); + while (docId != DocIdSetIterator.NO_MORE_DOCS) { + Document doc = ar.document(docId); + index.add(fld, term, doc.get(":path")); + + docId = docsEnum.nextDoc(); + } + + } + } + } + + return index; + } + + static class IndexStructure { + String indent = ""; + Map fields = new LinkedHashMap<>(); + void add(String field, String term, String path) { + FieldStructure fld = fields.get(field); + if (fld == null) { + fld = new FieldStructure(); + fld.indent = indent + " "; + fields.put(field, fld); + } + fld.add(term, path); + } + + void dump() { + for (Map.Entry fieldEntry : fields.entrySet()) { + System.out.println(indent + fieldEntry.getKey()); + fieldEntry.getValue().dump(); + } + } + } + + static class FieldStructure { + String indent = ""; + Map terms = new LinkedHashMap<>(); + void add(String term, String path) { + TermStructure ter = terms.get(term); + if (ter == null) { + ter = new TermStructure(); + terms.put(term, ter); + } + ter.add(path); + } + + void dump() { + for (Map.Entry termEntry : terms.entrySet()) { + System.out.println(indent + termEntry.getKey() + " => " + termEntry.getValue().paths); + } + } + } + + static class TermStructure { + List paths = new ArrayList<>(); + void add(String path) { + paths.add(path); + } + } +}