Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Invalid
-
2.7
-
None
Description
When the following testcase is run with Xerces (with a patch to use the new DOM Level 3 APIs under the covers) the output encoding shows as UTF-16, but should probably be UTF-8.
Here is the testcase:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.xml.serializer.DOM3Serializer;
import org.apache.xml.serializer.OutputPropertiesFactory;
import org.apache.xml.serializer.Serializer;
import org.apache.xml.serializer.SerializerFactory;
import org.w3c.dom.DOMConfiguration;
import org.w3c.dom.Document;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
public class TestDOM3 {
public static void main(String args[]) throws Exception
{ TestDOM3 test = new TestDOM3(); System.out.println(test.testDOM3LS()); }public Document getDocument() throws Exception
{ Document doc = null; DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); byte[] bytes = "<parent><child/></parent>".getBytes(); java.io.InputStream is = new java.io.ByteArrayInputStream(bytes); doc = builder.parse(is); return doc; }public String testDOM3LS() throws Exception
{ Document doc = getDocument(); System.setProperty(DOMImplementationRegistry.PROPERTY, "org.apache.xerces.dom.DOMImplementationSourceImpl"); DOMImplementationLS impl = (DOMImplementationLS) DOMImplementationRegistry .newInstance().getDOMImplementation("LS"); LSSerializer writer = impl.createLSSerializer(); DOMConfiguration config = writer.getDomConfig(); config.setParameter("format-pretty-print", Boolean.TRUE); return writer.writeToString(doc); }}
Here is the output:
<?xml version="1.0" encoding="UTF-16"?><parent><child/></parent>