Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (revision 572558)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (working copy)
@@ -38,7 +38,10 @@
import org.apache.jackrabbit.core.query.QueryHandlerContext;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.util.ISO9075;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.commons.collections.iterators.AbstractIteratorDecorator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -61,8 +64,13 @@
* configuration.
*/
public class IndexingConfigurationImpl implements IndexingConfiguration {
-
+
/**
+ * The logger instance for this class
+ */
+ private static final Logger log = LoggerFactory.getLogger(IndexingConfigurationImpl.class);
+
+ /**
* A namespace resolver for parsing QNames in the configuration.
*/
private NamespaceResolver nsResolver;
@@ -93,9 +101,14 @@
private AggregateRule[] aggregateRules;
/**
+ * The configured analyzers for indexing properties.
+ */
+ private Map analyzers = new HashMap();
+
+ /**
* {@inheritDoc}
*/
- public void init(Element config, QueryHandlerContext context) throws Exception {
+ public void init(Element config, QueryHandlerContext context, NamespaceMappings nsMappings) throws Exception {
ntReg = context.getNodeTypeRegistry();
ism = context.getItemStateManager();
NameResolver nameResolver = new ParsingNameResolver(
@@ -126,7 +139,46 @@
} else if (configNode.getNodeName().equals("aggregate")) {
idxAggregates.add(new AggregateRuleImpl(
configNode, nsResolver, ism, hmgr));
+ } else if (configNode.getNodeName().equals("analyzers")) {
+ NodeList childNodes = configNode.getChildNodes();
+ for (int j = 0; j < childNodes.getLength(); j++) {
+ Node analyzerNode = childNodes.item(j);
+ if (analyzerNode.getNodeName().equals("analyzer")) {
+ String analyzerClassName = analyzerNode.getAttributes().getNamedItem("class").getNodeValue();
+ try {
+ Class clazz = Class.forName(analyzerClassName);
+ // if analyzerClassName = JR.analyzerImpl : log warn, not allowed
+ if(Analyzer.class.isAssignableFrom(clazz)){
+ Analyzer analyzer = (Analyzer)clazz.newInstance();
+ NodeList propertyChildNodes = analyzerNode.getChildNodes();
+ for (int k = 0; k < propertyChildNodes.getLength(); k++) {
+ Node propertyNode = propertyChildNodes.item(k);
+ if (propertyNode.getNodeName().equals("property")) {
+ // get property name
+ QName propName = NameFormat.parse(getTextContent(propertyNode), nsResolver);
+ String fieldName = NameFormat.format(propName, nsMappings);
+ // set analyzer for the fulltext property fieldname
+ int idx = fieldName.indexOf(':');
+ fieldName = fieldName.substring(0, idx + 1)
+ + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);;
+ Object prevAnalyzer = analyzers.put(fieldName, analyzer);
+ if(prevAnalyzer!=null){
+ log.warn("Property " + propName.getLocalName() + " has been configured for multiple analyzers. " +
+ " Last configured analyzer is used");
+ }
+ }
+ }
+ } else {
+ log.warn("org.apache.lucene.analysis.Analyzer is not a superclass of "
+ + analyzerClassName +". Ignoring this configure analyzer" );
+ }
+ } catch (ClassNotFoundException e) {
+ log.warn("Analyzer class not found: " + analyzerClassName, e);
+ }
+ }
+ }
}
+
}
aggregateRules = (AggregateRule[]) idxAggregates.toArray(
new AggregateRule[idxAggregates.size()]);
@@ -211,6 +263,25 @@
return true;
}
+
+ /**
+ * Returns the analyzer configured for the property with this fieldName
+ * (the string representation ,JCR-style name, of the given QName
+ * prefixed with FieldNames.FULLTEXT_PREFIX)),
+ * and null if none is configured, or the configured analyzer
+ * cannot be found. If null is returned, the default Analyzer
+ * is used.
+ *
+ * @param fieldName the string representation ,JCR-style name, of the given QName
+ * prefixed with FieldNames.FULLTEXT_PREFIX))
+ * @return the analyzer to use for indexing this property
+ */
+ public Analyzer getPropertyAnalyzer(String fieldName) {
+ if(analyzers.containsKey(fieldName)){
+ return (Analyzer)analyzers.get(fieldName);
+ }
+ return null;
+ }
//---------------------------------< internal >-----------------------------
/**
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (revision 572558)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (working copy)
@@ -19,6 +19,7 @@
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.query.QueryHandlerContext;
import org.apache.jackrabbit.name.QName;
+import org.apache.lucene.analysis.Analyzer;
import org.w3c.dom.Element;
/**
@@ -39,9 +40,10 @@
*
* @param config the document element of the configuration DOM.
* @param context the context of the query handler.
+ * @param namespaceMappings the namespaceMappings.
* @throws Exception if initialization fails.
*/
- public void init(Element config, QueryHandlerContext context) throws Exception;
+ public void init(Element config, QueryHandlerContext context, NamespaceMappings namespaceMappings) throws Exception;
/**
* Returns the configured indexing aggregate rules or null if
@@ -92,4 +94,19 @@
* @return the boost for the node scope fulltext index field.
*/
float getNodeBoost(NodeState state);
+
+ /**
+ * Returns the analyzer configured for the property with this fieldName
+ * (the string representation ,JCR-style name, of the given QName
+ * prefixed with FieldNames.FULLTEXT_PREFIX),
+ * and null if none is configured, or the configured analyzer
+ * cannot be found. If null is returned, the default Analyzer
+ * is used.
+ *
+ * @param fieldName the string representation ,JCR-style name, of the given QName,
+ * prefixed with FieldNames.FULLTEXT_PREFIX)
+ * @return the analyzer to use for indexing this property
+ */
+ Analyzer getPropertyAnalyzer(String fieldName);
+
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (revision 572558)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (working copy)
@@ -37,6 +37,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
@@ -60,6 +61,7 @@
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.File;
+import java.io.Reader;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -328,7 +330,7 @@
* Default constructor.
*/
public SearchIndex() {
- this.analyzer = new StandardAnalyzer(new String[]{});
+ this.analyzer = new AnalyzerImpl();
}
/**
@@ -350,7 +352,6 @@
}
extractor = createTextExtractor();
- indexingConfig = createIndexingConfiguration();
synProvider = createSynonymProvider();
File indexDir = new File(path);
@@ -373,7 +374,9 @@
context.getNamespaceRegistry());
}
}
-
+
+ indexingConfig = createIndexingConfiguration(nsMappings);
+
index = new MultiIndex(indexDir, this, excludedIDs, nsMappings);
if (index.numDocs() == 0) {
index.createInitialIndex(
@@ -767,10 +770,11 @@
}
/**
+ * @param namespaceMappings The namespace mappings
* @return the fulltext indexing configuration or null if there
* is no configuration.
*/
- protected IndexingConfiguration createIndexingConfiguration() {
+ protected IndexingConfiguration createIndexingConfiguration(NamespaceMappings namespaceMappings) {
Element docElement = getIndexingConfigurationDOM();
if (docElement == null) {
return null;
@@ -778,7 +782,7 @@
try {
IndexingConfiguration idxCfg = (IndexingConfiguration)
indexingConfigurationClass.newInstance();
- idxCfg.init(docElement, getContext());
+ idxCfg.init(docElement, getContext(), namespaceMappings);
return idxCfg;
} catch (Exception e) {
log.warn("Exception initializing indexing configuration from: " +
@@ -1095,7 +1099,7 @@
public void setAnalyzer(String analyzerClassName) {
try {
Class analyzerClass = Class.forName(analyzerClassName);
- analyzer = (Analyzer) analyzerClass.newInstance();
+ ((AnalyzerImpl)analyzer).setDefaultAnalyzer((Analyzer) analyzerClass.newInstance());
} catch (Exception e) {
log.warn("Invalid Analyzer class: " + analyzerClassName, e);
}
@@ -1518,4 +1522,33 @@
throw new IOException("query handler closed and cannot be used anymore.");
}
}
+
+ /**
+ * This is the global jackrabbit lucene analyzer impl. By default, all
+ * properties are indexed with the StandardAnalyzer(new String[]{}),
+ * unless in the configuration a global analyzer is defined.
+ *
+ * In the indexing configuration, properties can be configured to be
+ * indexed with a specific analyzer. If configured, this analyzer is used to
+ * index the text of the property and to parse a search for this property.
+ */
+ private class AnalyzerImpl extends Analyzer {
+ private Analyzer defaultAnalyzer = new StandardAnalyzer(new String[]{});
+
+ private void setDefaultAnalyzer(Analyzer analyzer){
+ defaultAnalyzer = analyzer;
+ }
+
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ IndexingConfiguration indexingConfig = getIndexingConfig();
+ if(indexingConfig!=null){
+ Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName);
+ if(propertyAnalyzer!=null){
+ System.out.println(propertyAnalyzer);
+ return propertyAnalyzer.tokenStream(fieldName, reader);
+ }
+ }
+ return defaultAnalyzer.tokenStream(fieldName, reader);
+ }
+ }
}
Index: src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd
===================================================================
--- src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (revision 572558)
+++ src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (working copy)
@@ -63,4 +63,16 @@
-->
\ No newline at end of file
+ nodeScopeIndex CDATA "true">
+
+
+
+
+
+
\ No newline at end of file