Index: src/main/java/org/apache/jackrabbit/core/query/TextFilter.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/TextFilter.java (revision 421461)
+++ src/main/java/org/apache/jackrabbit/core/query/TextFilter.java (working copy)
@@ -29,6 +29,9 @@
* mime type ({@link #canFilter(String)} and if one of them returns
* true the text representation is created with
* {@link #doFilter(PropertyState, String)}
+ *
+ * @deprecated use the {@link org.apache.jackrabbit.extractor.TextExtractor}
+ * interface
*/
public interface TextFilter {
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (revision 421461)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (working copy)
@@ -17,13 +17,14 @@
package org.apache.jackrabbit.core.query.lucene;
import org.apache.jackrabbit.core.PropertyId;
-import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.jackrabbit.extractor.TextExtractor;
import org.apache.jackrabbit.name.NoPrefixDeclaredException;
import org.apache.jackrabbit.name.Path;
import org.apache.jackrabbit.name.QName;
@@ -37,12 +38,11 @@
import javax.jcr.NamespaceException;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
+
+import java.io.InputStream;
import java.io.Reader;
import java.util.Calendar;
-import java.util.Collections;
import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
import java.util.Set;
/**
@@ -72,9 +72,9 @@
protected final NamespaceMappings mappings;
/**
- * List of text filters in use.
+ * Content extractor.
*/
- protected final List textFilters;
+ protected final TextExtractor extractor;
/**
* Creates a new node indexer.
@@ -82,16 +82,16 @@
* @param node the node state to index.
* @param stateProvider the persistent item state manager to retrieve properties.
* @param mappings internal namespace mappings.
- * @param textFilters List of {@link org.apache.jackrabbit.core.query.TextFilter}s.
+ * @param extractor content extractor
*/
protected NodeIndexer(NodeState node,
ItemStateManager stateProvider,
NamespaceMappings mappings,
- List textFilters) {
+ TextExtractor extractor) {
this.node = node;
this.stateProvider = stateProvider;
this.mappings = mappings;
- this.textFilters = textFilters;
+ this.extractor = extractor;
}
/**
@@ -100,8 +100,7 @@
* @param node the node state to index.
* @param stateProvider the state provider to retrieve property values.
* @param mappings internal namespace mappings.
- * @param textFilters list of text filters to use for indexing binary
- * properties.
+ * @param extractor text extractor
* @return the lucene Document.
* @throws RepositoryException if an error occurs while reading property
* values from the ItemStateProvider.
@@ -109,9 +108,9 @@
public static Document createDocument(NodeState node,
ItemStateManager stateProvider,
NamespaceMappings mappings,
- List textFilters)
+ TextExtractor extractor)
throws RepositoryException {
- NodeIndexer indexer = new NodeIndexer(node, stateProvider, mappings, textFilters);
+ NodeIndexer indexer = new NodeIndexer(node, stateProvider, mappings, extractor);
return indexer.createDoc();
}
@@ -269,38 +268,23 @@
// don't know how to index
return;
}
- if (node.hasPropertyName(QName.JCR_MIMETYPE)) {
- PropertyState dataProp = (PropertyState) stateProvider.getItemState(
- new PropertyId(node.getNodeId(), QName.JCR_DATA));
- PropertyState mimeTypeProp =
- (PropertyState) stateProvider.getItemState(
- new PropertyId(node.getNodeId(), QName.JCR_MIMETYPE));
+ InternalValue typeValue = getValue(QName.JCR_MIMETYPE);
+ if (typeValue != null) {
+ String type = typeValue.internalValue().toString();
+
// jcr:encoding is not mandatory
String encoding = null;
- if (node.hasPropertyName(QName.JCR_ENCODING)) {
- PropertyState encodingProp =
- (PropertyState) stateProvider.getItemState(
- new PropertyId(node.getNodeId(), QName.JCR_ENCODING));
- encoding = encodingProp.getValues()[0].internalValue().toString();
+ InternalValue encodingValue = getValue(QName.JCR_ENCODING);
+ if (encodingValue != null) {
+ encoding = encodingValue.internalValue().toString();
}
- String mimeType = mimeTypeProp.getValues()[0].internalValue().toString();
- Map fields = Collections.EMPTY_MAP;
- for (Iterator it = textFilters.iterator(); it.hasNext();) {
- TextFilter filter = (TextFilter) it.next();
- // use the first filter that can handle the mimeType
- if (filter.canFilter(mimeType)) {
- fields = filter.doFilter(dataProp, encoding);
- break;
- }
- }
-
- for (Iterator it = fields.keySet().iterator(); it.hasNext();) {
- String field = (String) it.next();
- Reader r = (Reader) fields.get(field);
- doc.add(Field.Text(field, r));
- }
+ InputStream stream =
+ ((BLOBFileValue) internalValue).getStream();
+ Reader reader =
+ new TextExtractorReader(extractor, stream, type, encoding);
+ doc.add(Field.Text(FieldNames.FULLTEXT, reader));
}
} catch (ItemStateException e) {
log.warn("Exception while indexing binary property: " + e.toString());
@@ -312,6 +296,31 @@
}
/**
+ * Utility method that extracts the first value of the named property
+ * of the current node. Returns null if the property does
+ * not exist or contains no values.
+ *
+ * @param name property name
+ * @return value of the named property, or null
+ * @throws ItemStateException if the property can not be accessed
+ */
+ protected InternalValue getValue(QName name) throws ItemStateException {
+ try {
+ PropertyId id = new PropertyId(node.getNodeId(), name);
+ PropertyState property =
+ (PropertyState) stateProvider.getItemState(id);
+ InternalValue[] values = property.getValues();
+ if (values.length > 0) {
+ return values[0];
+ } else {
+ return null;
+ }
+ } catch (NoSuchItemStateException e) {
+ return null;
+ }
+ }
+
+ /**
* Adds the string representation of the boolean value to the document as
* the named field.
*
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/TextPlainTextFilter.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/TextPlainTextFilter.java (revision 421461)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/TextPlainTextFilter.java (working copy)
@@ -16,65 +16,21 @@
*/
package org.apache.jackrabbit.core.query.lucene;
-import org.apache.jackrabbit.core.query.TextFilter;
-import org.apache.jackrabbit.core.state.PropertyState;
-import org.apache.jackrabbit.core.value.BLOBFileValue;
-import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.jackrabbit.extractor.PlainTextExtractor;
-import javax.jcr.RepositoryException;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-import java.util.HashMap;
-import java.util.Map;
-
/**
- * Implements a {@link org.apache.jackrabbit.core.query.TextFilter} that handles binary properties of mime-type
- * text/plain.
+ * Text filter for text/plain content.
+ *
+ * @deprecated use {@link PlainTextExtractor}, this class is kept for
+ * backwards compatibility with existing configuration files
*/
-public class TextPlainTextFilter implements TextFilter {
+public class TextPlainTextFilter extends TextExtractorFilter {
/**
- * Returns true for text/plain; false
- * in all other cases.
- * @param mimeType the mime-type.
- * @return true for text/plain; false
- * in all other cases.
+ * Creates a text filter for text/plain content.
*/
- public boolean canFilter(String mimeType) {
- return "text/plain".equalsIgnoreCase(mimeType);
+ public TextPlainTextFilter() {
+ super(new PlainTextExtractor());
}
- /**
- * Returns a map with a single entry for field {@link FieldNames#FULLTEXT}.
- * @param data the data property.
- * @param encoding the encoding
- * @return a map with a single Reader value for field
- * {@link FieldNames#FULLTEXT}.
- * @throws RepositoryException if encoding is not supported or data is a
- * multi-value property.
- */
- public Map doFilter(PropertyState data, String encoding) throws RepositoryException {
- InternalValue[] values = data.getValues();
- if (values.length == 1) {
- BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
- try {
- Reader reader;
- if (encoding == null) {
- // use platform default
- reader = new InputStreamReader(blob.getStream());
- } else {
- reader = new InputStreamReader(blob.getStream(), encoding);
- }
- Map result = new HashMap();
- result.put(FieldNames.FULLTEXT, reader);
- return result;
- } catch (UnsupportedEncodingException e) {
- throw new RepositoryException(e);
- }
- } else {
- // multi value not supported
- throw new RepositoryException("Multi-valued binary properties not supported.");
- }
- }
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (revision 421461)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (working copy)
@@ -23,10 +23,11 @@
import org.apache.jackrabbit.core.query.AbstractQueryHandler;
import org.apache.jackrabbit.core.query.ExecutableQuery;
import org.apache.jackrabbit.core.query.QueryHandlerContext;
-import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.query.QueryHandler;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.NodeStateIterator;
+import org.apache.jackrabbit.extractor.DefaultTextExtractor;
+import org.apache.jackrabbit.extractor.TextExtractor;
import org.apache.jackrabbit.name.NoPrefixDeclaredException;
import org.apache.jackrabbit.name.QName;
import org.apache.jackrabbit.name.NameFormat;
@@ -51,9 +52,7 @@
import java.io.File;
import java.util.Iterator;
import java.util.List;
-import java.util.StringTokenizer;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
@@ -92,11 +91,6 @@
public static final int DEFAULT_MAX_FIELD_LENGTH = 10000;
/**
- * Default text filters.
- */
- public static final String DEFAULT_TEXT_FILTERS = TextPlainTextFilter.class.getName();
-
- /**
* The actual index
*/
private MultiIndex index;
@@ -107,11 +101,19 @@
private Analyzer analyzer;
/**
- * List of {@link org.apache.jackrabbit.core.query.TextFilter} instance.
+ * List of text extractor and text filter class names. The configured
+ * classes will be instantiated and used to extract text content from
+ * binary properties.
*/
- private List textFilters;
+ private String textFilterClasses =
+ DefaultTextExtractor.class.getName();
/**
+ * Text extractor for extracting text content of binary properties.
+ */
+ private TextExtractor extractor;
+
+ /**
* The location of the search index.
*
filterClasses must be a comma
- * separated String of fully qualified class names implementing
- * {@link org.apache.jackrabbit.core.query.TextFilter}. Each class must
- * provide a default constructor.
- *
- * Filter class names that cannot be resolved are skipped and a warn message
- * is logged.
+ * Sets the list of text extractors (and text filters) to use for
+ * extracting text content from binary properties. The list must be
+ * comma (or whitespace) separated, and contain fully qualified class
+ * names of the {@link TextExtractor} (and {@link TextFilter}) classes
+ * to be used. The configured classes must all have a public default
+ * constructor.
*
- * @param filterClasses comma separated list of filter class names
+ * @param filterClasses comma separated list of class names
*/
public void setTextFilterClasses(String filterClasses) {
- List filters = new ArrayList();
- StringTokenizer tokenizer = new StringTokenizer(filterClasses, ", \t\n\r\f");
- while (tokenizer.hasMoreTokens()) {
- String className = tokenizer.nextToken();
- try {
- Class filterClass = Class.forName(className);
- TextFilter filter = (TextFilter) filterClass.newInstance();
- filters.add(filter);
- } catch (Exception e) {
- log.warn("Invalid TextFilter class: " + className, e);
- } catch (LinkageError e) {
- log.warn("Missing dependency for text filter: " + className);
- log.warn(e.toString());
- }
- }
- textFilters = Collections.unmodifiableList(filters);
+ this.textFilterClasses = filterClasses;
}
- /**
- * Returns the fully qualified class names of the text filter instances
- * currently in use. The names are comma separated.
- *
- * @return class names of the text filters in use.
- */
- public String getTextFilterClasses() {
- StringBuffer names = new StringBuffer();
- String delim = "";
- for (Iterator it = textFilters.iterator(); it.hasNext();) {
- names.append(delim);
- names.append(it.next().getClass().getName());
- delim = ",";
- }
- return names.toString();
- }
-
//----------------------------< internal >----------------------------------
/**
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java (revision 0)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java (revision 0)
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+import org.apache.jackrabbit.core.query.TextFilter;
+import org.apache.jackrabbit.core.query.TextFilterExtractor;
+import org.apache.jackrabbit.extractor.CompositeTextExtractor;
+import org.apache.jackrabbit.extractor.DelegatingTextExtractor;
+import org.apache.jackrabbit.extractor.EmptyTextExtractor;
+import org.apache.jackrabbit.extractor.TextExtractor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Backwards-compatible Jackrabbit text extractor component. This class
+ * implements the following functionality:
+ * + * If a matching extractor is not found, then the configured text filters + * searched for an instance that claims to support the given content type. + * A text extractor adapter is created for that filter and saved in the + * extractor map for future use before delegating the request to the + * adapter. + *
+ * If not even a text filter is found for the given content type, a warning
+ * is logged and an empty text extractor is created for that content type
+ * and saved in the extractor map for future use before delegating the
+ * request to the empty extractor.
+ *
+ * @param stream binary stream
+ * @param type content type
+ * @param encoding character encoding, or null
+ * @return reader for the text content of the binary stream
+ * @throws IOException if the binary stream can not be read
+ */
+ public Reader extractText(InputStream stream, String type, String encoding)
+ throws IOException {
+ logger.debug("extractText(stream, {}, {})", type, encoding);
+ if (!types.contains(type)) {
+ Iterator iterator = filters.iterator();
+ while (iterator.hasNext()) {
+ TextFilter filter = (TextFilter) iterator.next();
+ if (filter.canFilter(type)) {
+ types.add(type);
+ extractor.addTextExtractor(
+ new TextFilterExtractor(type, filter));
+ break;
+ }
+ }
+ }
+
+ if (!types.contains(type)) {
+ logger.warn("Full text indexing of {} is not supported", type);
+ types.add(type);
+ extractor.addTextExtractor(new EmptyTextExtractor(type));
+ }
+
+ return extractor.extractText(stream, type, encoding);
+ }
+
+}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java (revision 0)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java (revision 0)
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+
+import org.apache.jackrabbit.extractor.TextExtractor;
+
+/**
+ * Reader that extracts the text content of a binary stream for reading
+ * only when the first character is requested. This class is used by the
+ * {@link NodeIndexer} class to postpone text extraction to when the
+ * content is actually needs.
+ *
+ * @see http://issues.apache.org/jira/browse/JCR-264
+ */
+public class TextExtractorReader extends Reader {
+
+ /**
+ * Text extractor to use in extracting text content from the binary stream.
+ */
+ private final TextExtractor extractor;
+
+ /**
+ * Binary stream from which to extract the content for reading.
+ */
+ private final InputStream stream;
+
+ /**
+ * Content type of the binary stream.
+ */
+ private final String type;
+
+ /**
+ * Character encoding of the binary stream, or null.
+ */
+ private final String encoding;
+
+ /**
+ * Reader for the extracted text content. Set to null until
+ * the first character request triggers the text extraction.
+ */
+ private Reader reader;
+
+ /**
+ * Creates a reader that extracts the text content from the given binary
+ * stream.
+ *
+ * @param extractor text extractor
+ * @param stream binary stream
+ * @param type content type
+ * @param encoding character encoding, or null
+ */
+ public TextExtractorReader(
+ TextExtractor extractor, InputStream stream,
+ String type, String encoding) {
+ this.extractor = extractor;
+ this.stream = stream;
+ this.type = type;
+ this.encoding = encoding;
+ this.reader = null;
+ }
+
+ //---------------------------------------------------------< InputStream >
+
+ /**
+ * Reads up to the given number of characters to the given buffer position
+ * from the extracted text content reader. Uses the text extractor to
+ * create the text content reader when first invoked.
+ *
+ * @param buffer buffer to place characters in
+ * @param offset buffer offset
+ * @param length maximum number of characters to read
+ * @return number of read characters
+ * @throws IOException if text extraction fails
+ */
+ public int read(char[] buffer, int offset, int length) throws IOException {
+ if (reader == null) {
+ reader = extractor.extractText(stream, type, encoding);
+ }
+ return reader.read(buffer, offset, length);
+ }
+
+ /**
+ * Closes the reader of the extracted text, or the binary stream if the
+ * text content was never extracted.
+ *
+ * @throws IOException if the reader or stream can not be closed
+ */
+ public void close() throws IOException {
+ if (reader != null) {
+ reader.close();
+ } else {
+ stream.close();
+ }
+ }
+
+}
Property changes on: src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorReader.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorFilter.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorFilter.java (revision 0)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorFilter.java (revision 0)
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.query.TextFilter;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.jackrabbit.extractor.TextExtractor;
+
+/**
+ * Utility base class for migrating functionality from existing implementations
+ * of the deprecated {@link TextFilter} interface to the new
+ * {@link TextExtractor} interface. Once the functionality of an existing
+ * TextFilter has been copied to a new TextExtractor, the original class can
+ * be replaced with the following template to keep backwards compatibility
+ * while avoiding the burden of maintaining duplicate code:
+ *
+ * public class SomeTextFilter extends TextExtractorFilter {
+ * public SomeTextFilter() {
+ * super(new SomeTextExtractor());
+ * }
+ * }
+ *
+ */
+public class TextExtractorFilter implements TextFilter {
+
+ /**
+ * The adapted text extractor.
+ */
+ private final TextExtractor extractor;
+
+ /**
+ * Creates a text filter adapter for the given text extractor.
+ *
+ * @param extractor adapted text extractor
+ */
+ public TextExtractorFilter(TextExtractor extractor) {
+ this.extractor = extractor;
+ }
+
+ /**
+ * Returns true if the adapted text extractor supports the given
+ * content type.
+ *
+ * @param mimeType content type
+ * @return true if the content type is supported,
+ * false otherwise
+ */
+ public boolean canFilter(String mimeType) {
+ mimeType = mimeType.toLowerCase();
+ String[] types = extractor.getContentTypes();
+ for (int i = 0; i < types.length; i++) {
+ if (types[i].equals(mimeType)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Extracts text content of the given binary property using the adapted
+ * text extractor.
+ *
+ * @param data binary property
+ * @param encoding character encoding, or null
+ * @return map that contains a reader for the extracted text as
+ * the {@link FieldNames#FULLTEXT} entry
+ * @throws RepositoryException if the binary property can not be read
+ */
+ public Map doFilter(PropertyState data, String encoding)
+ throws RepositoryException {
+ InternalValue[] values = data.getValues();
+ if (values.length == 1) {
+ try {
+ String type = "application/octet-stream";
+ String[] types = extractor.getContentTypes();
+ if (types.length > 0) {
+ type = types[0];
+ }
+
+ BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+ Reader reader =
+ extractor.extractText(blob.getStream(), type, encoding);
+
+ Map result = new HashMap();
+ result.put(FieldNames.FULLTEXT, reader);
+ return result;
+ } catch (IOException e) {
+ throw new RepositoryException("Text extraction error", e);
+ }
+ } else {
+ // multi value not supported
+ throw new RepositoryException(
+ "Multi-valued binary properties not supported.");
+ }
+ }
+
+}
Index: src/main/java/org/apache/jackrabbit/core/query/TextFilterExtractor.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/TextFilterExtractor.java (revision 0)
+++ src/main/java/org/apache/jackrabbit/core/query/TextFilterExtractor.java (revision 0)
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Map;
+
+import javax.jcr.RepositoryException;
+
+import org.apache.jackrabbit.core.PropertyId;
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.ItemState;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.jackrabbit.extractor.TextExtractor;
+
+/**
+ * Adapter class for achieving backwards compatibility with classes
+ * implementing the deprectated {@link TextFilter} interface. This class
+ * implements the {@link TextExtract} interface through calls to an
+ * underlying {@link TextFilter} instance.
+ */
+public class TextFilterExtractor implements TextExtractor {
+
+ /**
+ * Supported content types.
+ */
+ private final String[] types;
+
+ /**
+ * The adapted text filter.
+ */
+ private final TextFilter filter;
+
+ /**
+ * Creates a text extractor adapter that supports the given content
+ * types using the given text filter.
+ *
+ * @param types supported content types
+ * @param filter text filter to be adapted
+ */
+ public TextFilterExtractor(String[] types, TextFilter filter) {
+ this.types = types;
+ this.filter = filter;
+ }
+
+ /**
+ * Creates a text extractor adapter that supports the given content
+ * type using the given text filter.
+ *
+ * @param type supported content type
+ * @param filter text filter to be adapted
+ */
+ public TextFilterExtractor(String type, TextFilter filter) {
+ this(new String[] { type }, filter);
+ }
+
+ /**
+ * Returns the supported content types.
+ *
+ * @return supported content types
+ */
+ public String[] getContentTypes() {
+ return types;
+ }
+
+ /**
+ * Extracts the text content of the given binary stream by calling the
+ * underlying {@link TextFilter} instance. A dummy {@link PropertyState}
+ * instance is created to comply with the
+ * {@link TextFilter#doFilter(PropertyState, String)} method signature.
+ *
+ * @param stream binary stream
+ * @param type content type
+ * @param encoding character encoding, or null
+ * @return reader reader for the extracted text content
+ * @throws IOException if the adapted call fails
+ */
+ public Reader extractText(InputStream stream, String type, String encoding)
+ throws IOException {
+ final InternalValue value = InternalValue.create(stream);
+ try {
+ PropertyState state = new PropertyState(
+ (PropertyId) null, ItemState.STATUS_EXISTING, true);
+ state.setValues(new InternalValue[] { value });
+ Map fields = filter.doFilter(state, encoding);
+ Object fulltext = fields.get(FieldNames.FULLTEXT);
+ if (fulltext instanceof Reader) {
+ return new FilterReader((Reader) fulltext) {
+ public void close() throws IOException {
+ super.close();
+ ((BLOBFileValue) value.internalValue()).discard();
+ }
+ };
+ } else {
+ ((BLOBFileValue) value.internalValue()).discard();
+ return new StringReader("");
+ }
+ } catch (RepositoryException e) {
+ ((BLOBFileValue) value.internalValue()).discard();
+ return new StringReader("");
+ }
+ }
+
+}