Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (revision 572558) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (working copy) @@ -38,7 +38,10 @@ import org.apache.jackrabbit.core.query.QueryHandlerContext; import org.apache.jackrabbit.core.value.InternalValue; import org.apache.jackrabbit.util.ISO9075; +import org.apache.lucene.analysis.Analyzer; import org.apache.commons.collections.iterators.AbstractIteratorDecorator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.w3c.dom.CharacterData; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -63,6 +66,11 @@ public class IndexingConfigurationImpl implements IndexingConfiguration { /** + * The logger instance for this class + */ + private static final Logger log = LoggerFactory.getLogger(IndexingConfigurationImpl.class); + + /** * A namespace resolver for parsing QNames in the configuration. */ private NamespaceResolver nsResolver; @@ -93,9 +101,14 @@ private AggregateRule[] aggregateRules; /** + * The configured analyzers for indexing properties. + */ + private Map analyzers = new HashMap(); + + /** * {@inheritDoc} */ - public void init(Element config, QueryHandlerContext context) throws Exception { + public void init(Element config, QueryHandlerContext context, NamespaceMappings nsMappings) throws Exception { ntReg = context.getNodeTypeRegistry(); ism = context.getItemStateManager(); NameResolver nameResolver = new ParsingNameResolver( @@ -126,7 +139,49 @@ } else if (configNode.getNodeName().equals("aggregate")) { idxAggregates.add(new AggregateRuleImpl( configNode, nsResolver, ism, hmgr)); + } else if (configNode.getNodeName().equals("analyzers")) { + NodeList childNodes = configNode.getChildNodes(); + for (int j = 0; j < childNodes.getLength(); j++) { + Node analyzerNode = childNodes.item(j); + if (analyzerNode.getNodeName().equals("analyzer")) { + String analyzerClassName = analyzerNode.getAttributes().getNamedItem("class").getNodeValue(); + try { + Class clazz = Class.forName(analyzerClassName); + if(clazz == JackrabbitAnalyzer.class) { + log.warn("Not allowed to configure " + JackrabbitAnalyzer.class.getName() + " for a property. " + + "Using default analyzer for that property."); + } + else if(Analyzer.class.isAssignableFrom(clazz)){ + Analyzer analyzer = (Analyzer)clazz.newInstance(); + NodeList propertyChildNodes = analyzerNode.getChildNodes(); + for (int k = 0; k < propertyChildNodes.getLength(); k++) { + Node propertyNode = propertyChildNodes.item(k); + if (propertyNode.getNodeName().equals("property")) { + // get property name + QName propName = NameFormat.parse(getTextContent(propertyNode), nsResolver); + String fieldName = NameFormat.format(propName, nsMappings); + // set analyzer for the fulltext property fieldname + int idx = fieldName.indexOf(':'); + fieldName = fieldName.substring(0, idx + 1) + + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);; + Object prevAnalyzer = analyzers.put(fieldName, analyzer); + if(prevAnalyzer!=null){ + log.warn("Property " + propName.getLocalName() + " has been configured for multiple analyzers. " + + " Last configured analyzer is used"); + } + } + } + } else { + log.warn("org.apache.lucene.analysis.Analyzer is not a superclass of " + + analyzerClassName +". Ignoring this configure analyzer" ); + } + } catch (ClassNotFoundException e) { + log.warn("Analyzer class not found: " + analyzerClassName, e); + } + } + } } + } aggregateRules = (AggregateRule[]) idxAggregates.toArray( new AggregateRule[idxAggregates.size()]); @@ -211,6 +266,25 @@ return true; } + + /** + * Returns the analyzer configured for the property with this fieldName + * (the string representation ,JCR-style name, of the given QName + * prefixed with FieldNames.FULLTEXT_PREFIX)), + * and null if none is configured, or the configured analyzer + * cannot be found. If null is returned, the default Analyzer + * is used. + * + * @param fieldName the string representation ,JCR-style name, of the given QName + * prefixed with FieldNames.FULLTEXT_PREFIX)) + * @return the analyzer to use for indexing this property + */ + public Analyzer getPropertyAnalyzer(String fieldName) { + if(analyzers.containsKey(fieldName)){ + return (Analyzer)analyzers.get(fieldName); + } + return null; + } //---------------------------------< internal >----------------------------- /** Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (revision 572558) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (working copy) @@ -19,6 +19,7 @@ import org.apache.jackrabbit.core.state.NodeState; import org.apache.jackrabbit.core.query.QueryHandlerContext; import org.apache.jackrabbit.name.QName; +import org.apache.lucene.analysis.Analyzer; import org.w3c.dom.Element; /** @@ -39,9 +40,10 @@ * * @param config the document element of the configuration DOM. * @param context the context of the query handler. + * @param namespaceMappings the namespaceMappings. * @throws Exception if initialization fails. */ - public void init(Element config, QueryHandlerContext context) throws Exception; + public void init(Element config, QueryHandlerContext context, NamespaceMappings namespaceMappings) throws Exception; /** * Returns the configured indexing aggregate rules or null if @@ -92,4 +94,19 @@ * @return the boost for the node scope fulltext index field. */ float getNodeBoost(NodeState state); + + /** + * Returns the analyzer configured for the property with this fieldName + * (the string representation ,JCR-style name, of the given QName + * prefixed with FieldNames.FULLTEXT_PREFIX), + * and null if none is configured, or the configured analyzer + * cannot be found. If null is returned, the default Analyzer + * is used. + * + * @param fieldName the string representation ,JCR-style name, of the given QName, + * prefixed with FieldNames.FULLTEXT_PREFIX) + * @return the analyzer to use for indexing this property + */ + Analyzer getPropertyAnalyzer(String fieldName); + } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (revision 572558) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (working copy) @@ -37,7 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; @@ -150,7 +149,7 @@ /** * The analyzer we use for indexing. */ - private Analyzer analyzer; + private JackrabbitAnalyzer analyzer; /** * List of text extractor and text filter class names. The configured @@ -328,7 +327,7 @@ * Default constructor. */ public SearchIndex() { - this.analyzer = new StandardAnalyzer(new String[]{}); + this.analyzer = new JackrabbitAnalyzer(); } /** @@ -350,7 +349,6 @@ } extractor = createTextExtractor(); - indexingConfig = createIndexingConfiguration(); synProvider = createSynonymProvider(); File indexDir = new File(path); @@ -373,7 +371,10 @@ context.getNamespaceRegistry()); } } - + + indexingConfig = createIndexingConfiguration(nsMappings); + analyzer.setIndexingConfig(indexingConfig); + index = new MultiIndex(indexDir, this, excludedIDs, nsMappings); if (index.numDocs() == 0) { index.createInitialIndex( @@ -767,10 +768,11 @@ } /** + * @param namespaceMappings The namespace mappings * @return the fulltext indexing configuration or null if there * is no configuration. */ - protected IndexingConfiguration createIndexingConfiguration() { + protected IndexingConfiguration createIndexingConfiguration(NamespaceMappings namespaceMappings) { Element docElement = getIndexingConfigurationDOM(); if (docElement == null) { return null; @@ -778,7 +780,7 @@ try { IndexingConfiguration idxCfg = (IndexingConfiguration) indexingConfigurationClass.newInstance(); - idxCfg.init(docElement, getContext()); + idxCfg.init(docElement, getContext(), namespaceMappings); return idxCfg; } catch (Exception e) { log.warn("Exception initializing indexing configuration from: " + @@ -1095,7 +1097,7 @@ public void setAnalyzer(String analyzerClassName) { try { Class analyzerClass = Class.forName(analyzerClassName); - analyzer = (Analyzer) analyzerClass.newInstance(); + analyzer.setDefaultAnalyzer((Analyzer) analyzerClass.newInstance()); } catch (Exception e) { log.warn("Invalid Analyzer class: " + analyzerClassName, e); } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java (revision 0) @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +import java.io.Reader; + +/** + * This is the global jackrabbit lucene analyzer. By default, all + * properties are indexed with the StandardAnalyzer(new String[]{}), + * unless in the configuration a global analyzer is defined. + * + * In the indexing configuration, properties can be configured to be + * indexed with a specific analyzer. If configured, this analyzer is used to + * index the text of the property and to parse searchtext for this property. + */ + +public class JackrabbitAnalyzer extends Analyzer { + + /** + * The default Jackrabbit analyzer if none is configured in + * configuration. + */ + private Analyzer defaultAnalyzer = new StandardAnalyzer(new String[]{}); + + /** + * The indexing configuration. + */ + private IndexingConfiguration indexingConfig; + + /** + * A param indexingConfig the indexing configuration. + */ + protected void setIndexingConfig(IndexingConfiguration indexingConfig) { + this.indexingConfig = indexingConfig; + } + + /** + * @param analyzer the default jackrabbit analyzer + */ + protected void setDefaultAnalyzer(Analyzer analyzer){ + defaultAnalyzer = analyzer; + } + + /** + * Creates a TokenStream which tokenizes all the text in the provided + * Reader. If the fieldName (property) is configured to have a different + * analyzer than the default, this analyzer is used for tokenization + */ + public TokenStream tokenStream(String fieldName, Reader reader) { + if(indexingConfig!=null){ + Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName); + if(propertyAnalyzer!=null){ + return propertyAnalyzer.tokenStream(fieldName, reader); + } + } + return defaultAnalyzer.tokenStream(fieldName, reader); + } + +} Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\JackrabbitAnalyzer.java ___________________________________________________________________ Name: svn:keywords + Id Name: svn:eol-style + native Index: src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd =================================================================== --- src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (revision 572558) +++ src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (working copy) @@ -63,4 +63,16 @@ --> \ No newline at end of file + nodeScopeIndex CDATA "true"> + + + + + + \ No newline at end of file