Index: project.xml
===================================================================
--- project.xml (revision 374440)
+++ project.xml (working copy)
@@ -185,7 +185,7 @@
poi
poi
- 2.0-final-20040126
+ 2.5.1-final-20040804
jar
Index: src/java/org/apache/jackrabbit/core/query/OOoContentHandler.java
===================================================================
--- src/java/org/apache/jackrabbit/core/query/OOoContentHandler.java (revision 0)
+++ src/java/org/apache/jackrabbit/core/query/OOoContentHandler.java (revision 0)
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class OOoContentHandler extends DefaultHandler
+{
+
+ private StringBuffer content;
+ private boolean appendChar;
+
+ public OOoContentHandler()
+ {
+ content = new StringBuffer();
+ appendChar = false;
+ }
+
+ /** Returns the text content extracted from parsed content.xml */
+ public String getContent()
+ {
+ return content.toString();
+ }
+
+ public void startElement(String namespaceURI, String localName,
+ String rawName, Attributes atts)
+ throws SAXException
+ {
+ if(rawName.startsWith("text:"))
+ appendChar = true;
+ }
+
+ public void characters(char[] ch, int start, int length) throws SAXException
+ {
+ if(appendChar)
+ content.append(ch,start,length).append(" ");
+ }
+
+ public void endElement(java.lang.String namespaceURI,
+ java.lang.String localName,
+ java.lang.String qName)
+ throws SAXException
+ {
+ appendChar = false;
+ }
+}
Index: src/java/org/apache/jackrabbit/core/query/OpenOfficeTextFilter.java
===================================================================
--- src/java/org/apache/jackrabbit/core/query/OpenOfficeTextFilter.java (revision 0)
+++ src/java/org/apache/jackrabbit/core/query/OpenOfficeTextFilter.java (revision 0)
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query;
+
+import javax.jcr.RepositoryException;
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.value.BLOBFileValue;
+import org.apache.jackrabbit.core.value.InternalValue;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipEntry;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+/**
+ * Extracts texts from OpenOffice document data.
+ */
+public class OpenOfficeTextFilter implements TextFilter
+{
+ private XMLReader xmlReader;
+
+ public boolean canFilter(String mimeType)
+ {
+ return "application/vnd.oasis.opendocument.database".equalsIgnoreCase(mimeType) ||
+ "application/vnd.oasis.opendocument.formula".equalsIgnoreCase(mimeType) ||
+ "application/vnd.oasis.opendocument.graphics".equalsIgnoreCase(mimeType) ||
+ "application/vnd.oasis.opendocument.presentation".equalsIgnoreCase(mimeType) ||
+ "application/vnd.oasis.opendocument.spreadsheet".equalsIgnoreCase(mimeType) ||
+ "application/vnd.oasis.opendocument.text".equalsIgnoreCase(mimeType);
+ }
+ public Map doFilter(PropertyState data, String encoding)
+ throws RepositoryException
+ {
+ ZipInputStream zis=null;
+ if(xmlReader == null)
+ initParser();
+
+ InternalValue[] values = data.getValues();
+ if (values.length > 0)
+ {
+ BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
+
+ try
+ {
+ zis = new ZipInputStream(blob.getStream());
+ ZipEntry ze = zis.getNextEntry();
+ while(!ze.getName().equals("content.xml"))
+ ze = zis.getNextEntry();
+ OOoContentHandler contentHandler = new OOoContentHandler();
+ xmlReader.setContentHandler(contentHandler);
+ xmlReader.parse(new InputSource(zis));
+ zis.close();
+
+ Map result = new HashMap();
+ result.put(FieldNames.FULLTEXT, new StringReader(contentHandler.getContent()));
+ return result;
+ }
+ catch (Exception ex)
+ {
+ throw new RepositoryException(ex);
+ }
+ finally
+ {
+ if (zis != null)
+ {
+ try
+ {
+ zis.close();
+ }
+ catch (IOException ioe)
+ {
+ ioe.printStackTrace();
+ }
+ }
+ }
+ }
+ else
+ {
+ // multi value not supported
+ throw new RepositoryException("Multi-valued binary properties not supported.");
+ }
+
+ }
+
+ private void initParser() throws RepositoryException
+ {
+ try
+ {
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+ saxParserFactory.setValidating(false);
+ SAXParser saxParser = saxParserFactory.newSAXParser();
+ xmlReader = saxParser.getXMLReader();
+ xmlReader.setFeature("http://xml.org/sax/features/validation", false);
+ xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+ }
+ catch (Exception e)
+ {
+ throw new RepositoryException(e);
+ }
+ }
+
+}
\ No newline at end of file
Index: src/test/org/apache/jackrabbit/core/query/test/OpenOfficeTest.java
===================================================================
--- src/test/org/apache/jackrabbit/core/query/test/OpenOfficeTest.java (revision 0)
+++ src/test/org/apache/jackrabbit/core/query/test/OpenOfficeTest.java (revision 0)
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.test;
+
+import java.io.File;
+
+import org.apache.jackrabbit.core.query.OpenOfficeTextFilter;
+
+
+public class OpenOfficeTest extends AbstractTextFilterTest {
+
+ public static void main(String[] args) throws Exception {
+ OpenOfficeTest test = new OpenOfficeTest();
+ File file = new File(args[0]);
+ test.showResult(file, new OpenOfficeTextFilter());
+ }
+}