Description
Exception when parsing this MS PowerPoint file : http://jeanferrette.free.fr/MS8.ppt
java.io.IOException: Substitut UTF-16 non valide détecté : db00 bfff ?
at com.sun.org.apache.xml.internal.serializer.ToStream.endElement(ToStream.java:2060)
at com.sun.org.apache.xalan.internal.xsltc.trax.TransformerHandlerImpl.endElement(TransformerHandlerImpl.java:273)
at org.apache.tika.sax.TeeContentHandler.endElement(TeeContentHandler.java:94)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.SecureContentHandler.endElement(SecureContentHandler.java:215)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.XHTMLContentHandler.lazyEndHead(XHTMLContentHandler.java:169)
at org.apache.tika.sax.XHTMLContentHandler.startElement(XHTMLContentHandler.java:234)
at org.apache.tika.sax.XHTMLContentHandler.startElement(XHTMLContentHandler.java:271)
at org.apache.tika.sax.XHTMLContentHandler.element(XHTMLContentHandler.java:308)
at org.apache.tika.parser.microsoft.HSLFExtractor.parse(HSLFExtractor.java:41)
at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:201)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:129)
[...]
Parsing this file works fine with tika 0.4.