Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
2.9.1
-
None
-
Linux
Description
If there are multiple text node children on an element (which, in this case given below, results from filtering elements during parsing) then the
element in the DOM on output contains only the final text node:
Demonstration code:
//////////////////////////////////
package test;
import junit.framework.TestCase;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.ls.LSParserFilter;
import org.w3c.dom.traversal.NodeFilter;
import com.sun.org.apache.xerces.internal.parsers.DOMParserImpl;
public class TestBug extends TestCase {
private static final String EXAMPLE_NS = "http://www.example.com";
public void testFilteringSiblingTextNodes() throws Exception {
final DOMParserImpl parser = new DOMParserImpl("com.sun.org.apache.xerces.internal.parsers.XIncludeAwareParserConfiguration", "http://www.w3.org/2001/XMLSchema");
parser.setFilter(new LSParserFilter() {
public short acceptNode(final Node nodeArg)
{ return LSParserFilter.FILTER_ACCEPT; }public int getWhatToShow()
{ return NodeFilter.SHOW_ALL; } public short startElement(final Element elementArg) {
if (EXAMPLE_NS.equals(elementArg.getNamespaceURI()))
else
{ return LSParserFilter.FILTER_SKIP; } }
});
final Document document = parser.parseURI(getClass().getResource("input.xml").toString());
assertEquals("List:1)Item 1,2)Item 2.", document.getElementsByTagNameNS(EXAMPLE_NS, "foo").item(0).getTextContent());
}
}
/////////////////////////////////
resource "input.xml"
<html xmlns='http://www.w3.org/1999/xhtml' xmlns:ex='http://www.example.com'>
<ex:foo>List:<br />1)Item 1,<br />2)Item 2.</ex:foo>
</html>
/////////////////////////////////////////