Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (revision 572558) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (working copy) @@ -38,7 +38,10 @@ import org.apache.jackrabbit.core.query.QueryHandlerContext; import org.apache.jackrabbit.core.value.InternalValue; import org.apache.jackrabbit.util.ISO9075; +import org.apache.lucene.analysis.Analyzer; import org.apache.commons.collections.iterators.AbstractIteratorDecorator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.w3c.dom.CharacterData; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -61,8 +64,13 @@ * configuration. */ public class IndexingConfigurationImpl implements IndexingConfiguration { - + /** + * The logger instance for this class + */ + private static final Logger log = LoggerFactory.getLogger(IndexingConfigurationImpl.class); + + /** * A namespace resolver for parsing QNames in the configuration. */ private NamespaceResolver nsResolver; @@ -93,9 +101,14 @@ private AggregateRule[] aggregateRules; /** + * The configured analyzers for indexing properties. + */ + private Map analyzers = new HashMap(); + + /** * {@inheritDoc} */ - public void init(Element config, QueryHandlerContext context) throws Exception { + public void init(Element config, QueryHandlerContext context, NamespaceMappings nsMappings) throws Exception { ntReg = context.getNodeTypeRegistry(); ism = context.getItemStateManager(); NameResolver nameResolver = new ParsingNameResolver( @@ -126,7 +139,46 @@ } else if (configNode.getNodeName().equals("aggregate")) { idxAggregates.add(new AggregateRuleImpl( configNode, nsResolver, ism, hmgr)); + } else if (configNode.getNodeName().equals("analyzers")) { + NodeList childNodes = configNode.getChildNodes(); + for (int j = 0; j < childNodes.getLength(); j++) { + Node analyzerNode = childNodes.item(j); + if (analyzerNode.getNodeName().equals("analyzer")) { + String analyzerClassName = analyzerNode.getAttributes().getNamedItem("class").getNodeValue(); + try { + Class clazz = Class.forName(analyzerClassName); + // if analyzerClassName = JR.analyzerImpl : log warn, not allowed + if(Analyzer.class.isAssignableFrom(clazz)){ + Analyzer analyzer = (Analyzer)clazz.newInstance(); + NodeList propertyChildNodes = analyzerNode.getChildNodes(); + for (int k = 0; k < propertyChildNodes.getLength(); k++) { + Node propertyNode = propertyChildNodes.item(k); + if (propertyNode.getNodeName().equals("property")) { + // get property name + QName propName = NameFormat.parse(getTextContent(propertyNode), nsResolver); + String fieldName = NameFormat.format(propName, nsMappings); + // set analyzer for the fulltext property fieldname + int idx = fieldName.indexOf(':'); + fieldName = fieldName.substring(0, idx + 1) + + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);; + Object prevAnalyzer = analyzers.put(fieldName, analyzer); + if(prevAnalyzer!=null){ + log.warn("Property " + propName.getLocalName() + " has been configured for multiple analyzers. " + + " Last configured analyzer is used"); + } + } + } + } else { + log.warn("org.apache.lucene.analysis.Analyzer is not a superclass of " + + analyzerClassName +". Ignoring this configure analyzer" ); + } + } catch (ClassNotFoundException e) { + log.warn("Analyzer class not found: " + analyzerClassName, e); + } + } + } } + } aggregateRules = (AggregateRule[]) idxAggregates.toArray( new AggregateRule[idxAggregates.size()]); @@ -211,6 +263,25 @@ return true; } + + /** + * Returns the analyzer configured for the property with this fieldName + * (the string representation ,JCR-style name, of the given QName + * prefixed with FieldNames.FULLTEXT_PREFIX)), + * and null if none is configured, or the configured analyzer + * cannot be found. If null is returned, the default Analyzer + * is used. + * + * @param fieldName the string representation ,JCR-style name, of the given QName + * prefixed with FieldNames.FULLTEXT_PREFIX)) + * @return the analyzer to use for indexing this property + */ + public Analyzer getPropertyAnalyzer(String fieldName) { + if(analyzers.containsKey(fieldName)){ + return (Analyzer)analyzers.get(fieldName); + } + return null; + } //---------------------------------< internal >----------------------------- /** Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (revision 572558) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (working copy) @@ -19,6 +19,7 @@ import org.apache.jackrabbit.core.state.NodeState; import org.apache.jackrabbit.core.query.QueryHandlerContext; import org.apache.jackrabbit.name.QName; +import org.apache.lucene.analysis.Analyzer; import org.w3c.dom.Element; /** @@ -39,9 +40,10 @@ * * @param config the document element of the configuration DOM. * @param context the context of the query handler. + * @param namespaceMappings the namespaceMappings. * @throws Exception if initialization fails. */ - public void init(Element config, QueryHandlerContext context) throws Exception; + public void init(Element config, QueryHandlerContext context, NamespaceMappings namespaceMappings) throws Exception; /** * Returns the configured indexing aggregate rules or null if @@ -92,4 +94,19 @@ * @return the boost for the node scope fulltext index field. */ float getNodeBoost(NodeState state); + + /** + * Returns the analyzer configured for the property with this fieldName + * (the string representation ,JCR-style name, of the given QName + * prefixed with FieldNames.FULLTEXT_PREFIX), + * and null if none is configured, or the configured analyzer + * cannot be found. If null is returned, the default Analyzer + * is used. + * + * @param fieldName the string representation ,JCR-style name, of the given QName, + * prefixed with FieldNames.FULLTEXT_PREFIX) + * @return the analyzer to use for indexing this property + */ + Analyzer getPropertyAnalyzer(String fieldName); + } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (revision 572558) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (working copy) @@ -37,6 +37,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; @@ -60,6 +61,7 @@ import javax.xml.parsers.ParserConfigurationException; import java.io.IOException; import java.io.File; +import java.io.Reader; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -328,7 +330,7 @@ * Default constructor. */ public SearchIndex() { - this.analyzer = new StandardAnalyzer(new String[]{}); + this.analyzer = new AnalyzerImpl(); } /** @@ -350,7 +352,6 @@ } extractor = createTextExtractor(); - indexingConfig = createIndexingConfiguration(); synProvider = createSynonymProvider(); File indexDir = new File(path); @@ -373,7 +374,9 @@ context.getNamespaceRegistry()); } } - + + indexingConfig = createIndexingConfiguration(nsMappings); + index = new MultiIndex(indexDir, this, excludedIDs, nsMappings); if (index.numDocs() == 0) { index.createInitialIndex( @@ -767,10 +770,11 @@ } /** + * @param namespaceMappings The namespace mappings * @return the fulltext indexing configuration or null if there * is no configuration. */ - protected IndexingConfiguration createIndexingConfiguration() { + protected IndexingConfiguration createIndexingConfiguration(NamespaceMappings namespaceMappings) { Element docElement = getIndexingConfigurationDOM(); if (docElement == null) { return null; @@ -778,7 +782,7 @@ try { IndexingConfiguration idxCfg = (IndexingConfiguration) indexingConfigurationClass.newInstance(); - idxCfg.init(docElement, getContext()); + idxCfg.init(docElement, getContext(), namespaceMappings); return idxCfg; } catch (Exception e) { log.warn("Exception initializing indexing configuration from: " + @@ -1095,7 +1099,7 @@ public void setAnalyzer(String analyzerClassName) { try { Class analyzerClass = Class.forName(analyzerClassName); - analyzer = (Analyzer) analyzerClass.newInstance(); + ((AnalyzerImpl)analyzer).setDefaultAnalyzer((Analyzer) analyzerClass.newInstance()); } catch (Exception e) { log.warn("Invalid Analyzer class: " + analyzerClassName, e); } @@ -1518,4 +1522,33 @@ throw new IOException("query handler closed and cannot be used anymore."); } } + + /** + * This is the global jackrabbit lucene analyzer impl. By default, all + * properties are indexed with the StandardAnalyzer(new String[]{}), + * unless in the configuration a global analyzer is defined. + * + * In the indexing configuration, properties can be configured to be + * indexed with a specific analyzer. If configured, this analyzer is used to + * index the text of the property and to parse a search for this property. + */ + private class AnalyzerImpl extends Analyzer { + private Analyzer defaultAnalyzer = new StandardAnalyzer(new String[]{}); + + private void setDefaultAnalyzer(Analyzer analyzer){ + defaultAnalyzer = analyzer; + } + + public TokenStream tokenStream(String fieldName, Reader reader) { + IndexingConfiguration indexingConfig = getIndexingConfig(); + if(indexingConfig!=null){ + Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName); + if(propertyAnalyzer!=null){ + System.out.println(propertyAnalyzer); + return propertyAnalyzer.tokenStream(fieldName, reader); + } + } + return defaultAnalyzer.tokenStream(fieldName, reader); + } + } } Index: src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd =================================================================== --- src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (revision 572558) +++ src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (working copy) @@ -63,4 +63,16 @@ --> \ No newline at end of file + nodeScopeIndex CDATA "true"> + + + + + + \ No newline at end of file