Index: project.xml =================================================================== --- project.xml (revision 374440) +++ project.xml (working copy) @@ -185,7 +185,7 @@ poi poi - 2.0-final-20040126 + 2.5.1-final-20040804 jar Index: src/java/org/apache/jackrabbit/core/query/OOoContentHandler.java =================================================================== --- src/java/org/apache/jackrabbit/core/query/OOoContentHandler.java (revision 0) +++ src/java/org/apache/jackrabbit/core/query/OOoContentHandler.java (revision 0) @@ -0,0 +1,62 @@ +/* + * Copyright 2004-2005 The Apache Software Foundation or its licensors, + * as applicable. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +public class OOoContentHandler extends DefaultHandler +{ + + private StringBuffer content; + private boolean appendChar; + + public OOoContentHandler() + { + content = new StringBuffer(); + appendChar = false; + } + + /** Returns the text content extracted from parsed content.xml */ + public String getContent() + { + return content.toString(); + } + + public void startElement(String namespaceURI, String localName, + String rawName, Attributes atts) + throws SAXException + { + if(rawName.startsWith("text:")) + appendChar = true; + } + + public void characters(char[] ch, int start, int length) throws SAXException + { + if(appendChar) + content.append(ch,start,length).append(" "); + } + + public void endElement(java.lang.String namespaceURI, + java.lang.String localName, + java.lang.String qName) + throws SAXException + { + appendChar = false; + } +} Index: src/java/org/apache/jackrabbit/core/query/OpenOfficeTextFilter.java =================================================================== --- src/java/org/apache/jackrabbit/core/query/OpenOfficeTextFilter.java (revision 0) +++ src/java/org/apache/jackrabbit/core/query/OpenOfficeTextFilter.java (revision 0) @@ -0,0 +1,123 @@ +/* + * Copyright 2004-2005 The Apache Software Foundation or its licensors, + * as applicable. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query; + +import javax.jcr.RepositoryException; +import org.apache.jackrabbit.core.query.lucene.FieldNames; +import org.apache.jackrabbit.core.state.PropertyState; +import org.apache.jackrabbit.core.value.BLOBFileValue; +import org.apache.jackrabbit.core.value.InternalValue; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipEntry; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +/** + * Extracts texts from OpenOffice document data. + */ +public class OpenOfficeTextFilter implements TextFilter +{ + private XMLReader xmlReader; + + public boolean canFilter(String mimeType) + { + return "application/vnd.oasis.opendocument.database".equalsIgnoreCase(mimeType) || + "application/vnd.oasis.opendocument.formula".equalsIgnoreCase(mimeType) || + "application/vnd.oasis.opendocument.graphics".equalsIgnoreCase(mimeType) || + "application/vnd.oasis.opendocument.presentation".equalsIgnoreCase(mimeType) || + "application/vnd.oasis.opendocument.spreadsheet".equalsIgnoreCase(mimeType) || + "application/vnd.oasis.opendocument.text".equalsIgnoreCase(mimeType); + } + public Map doFilter(PropertyState data, String encoding) + throws RepositoryException + { + ZipInputStream zis=null; + if(xmlReader == null) + initParser(); + + InternalValue[] values = data.getValues(); + if (values.length > 0) + { + BLOBFileValue blob = (BLOBFileValue) values[0].internalValue(); + + try + { + zis = new ZipInputStream(blob.getStream()); + ZipEntry ze = zis.getNextEntry(); + while(!ze.getName().equals("content.xml")) + ze = zis.getNextEntry(); + OOoContentHandler contentHandler = new OOoContentHandler(); + xmlReader.setContentHandler(contentHandler); + xmlReader.parse(new InputSource(zis)); + zis.close(); + + Map result = new HashMap(); + result.put(FieldNames.FULLTEXT, new StringReader(contentHandler.getContent())); + return result; + } + catch (Exception ex) + { + throw new RepositoryException(ex); + } + finally + { + if (zis != null) + { + try + { + zis.close(); + } + catch (IOException ioe) + { + ioe.printStackTrace(); + } + } + } + } + else + { + // multi value not supported + throw new RepositoryException("Multi-valued binary properties not supported."); + } + + } + + private void initParser() throws RepositoryException + { + try + { + SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); + saxParserFactory.setValidating(false); + SAXParser saxParser = saxParserFactory.newSAXParser(); + xmlReader = saxParser.getXMLReader(); + xmlReader.setFeature("http://xml.org/sax/features/validation", false); + xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + } + catch (Exception e) + { + throw new RepositoryException(e); + } + } + +} \ No newline at end of file Index: src/test/org/apache/jackrabbit/core/query/test/OpenOfficeTest.java =================================================================== --- src/test/org/apache/jackrabbit/core/query/test/OpenOfficeTest.java (revision 0) +++ src/test/org/apache/jackrabbit/core/query/test/OpenOfficeTest.java (revision 0) @@ -0,0 +1,31 @@ +/* + * Copyright 2004-2005 The Apache Software Foundation or its licensors, + * as applicable. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.test; + +import java.io.File; + +import org.apache.jackrabbit.core.query.OpenOfficeTextFilter; + + +public class OpenOfficeTest extends AbstractTextFilterTest { + + public static void main(String[] args) throws Exception { + OpenOfficeTest test = new OpenOfficeTest(); + File file = new File(args[0]); + test.showResult(file, new OpenOfficeTextFilter()); + } +}