--- OpenSearchServlet.java (revision 413155) +++ OpenSearchServlet.java (working copy) @@ -263,23 +263,64 @@ private static void addNode(Document doc, Node parent, String name, String text) { Element child = doc.createElement(name); - child.appendChild(doc.createTextNode(text)); + child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } private static void addNode(Document doc, Node parent, String ns, String name, String text) { Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); - child.appendChild(doc.createTextNode(text)); + child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } private static void addAttribute(Document doc, Element node, String name, String value) { Attr attribute = doc.createAttribute(name); - attribute.setValue(value); + attribute.setValue(getLegalXml(getLegalXml(value))); node.getAttributes().setNamedItem(attribute); } + /* + * Ensure string is legal xml. + * First look to see if string has illegal characters. If it doesn't, + * just return it. Otherwise, create new string with illegal characters + * @param text String to verify. + * @return Passed text or a new string with illegal + * characters removed if any found in text. + * @see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char + */ + private static String getLegalXml(final String text) { + if (text == null) { + return null; + } + boolean allLegal = true; + for (int i = 0; i < text.length(); i++) { + if (!isLegalXml(text.charAt(i))) { + allLegal = false; + break; + } + } + return allLegal? text: createLegalXml(text); + } + + private static String createLegalXml(final String text) { + if (text == null) { + return null; + } + StringBuffer buffer = new StringBuffer(text.length()); + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + if (isLegalXml(c)) { + buffer.append(c); + } + } + return buffer.toString(); + } + + private static boolean isLegalXml(final char c) { + return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff) + || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff); + } }