diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java index fbdd340..5bf3318 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java @@ -23,6 +23,9 @@ import java.io.StringReader; import javax.xml.namespace.QName; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; @@ -38,9 +41,15 @@ * of this class. */ public class UDFXPathUtil { + public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/"; + public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities"; + public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities"; + private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + private DocumentBuilder builder = null; private XPath xpath = XPathFactory.newInstance().newXPath(); private ReusableStringReader reader = new ReusableStringReader(); private InputSource inputSource = new InputSource(reader); + private XPathExpression expression = null; private String oldPath = null; @@ -66,15 +75,31 @@ public Object eval(String xml, String path, QName qname) { return null; } + if (builder == null){ + try { + initializeDocumentBuilderFactory(); + builder = dbf.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException("Error instantiating DocumentBuilder, cannot build xml parser", e); + } + } + reader.set(xml); try { - return expression.evaluate(inputSource, qname); + return expression.evaluate(builder.parse(inputSource), qname); } catch (XPathExpressionException e) { throw new RuntimeException ("Invalid expression '" + oldPath + "'", e); + } catch (Exception e) { + throw new RuntimeException("Error loading expression '" + oldPath + "'", e); } } + private void initializeDocumentBuilderFactory() throws ParserConfigurationException { + dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false); + dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false); + } + public Boolean evalBoolean(String xml, String path) { return (Boolean) eval(xml, path, XPathConstants.BOOLEAN); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java index 2edcb7d..060ce2e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java @@ -20,12 +20,15 @@ import javax.xml.xpath.XPathConstants; +import org.apache.commons.io.FileUtils; import org.junit.Test; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import static org.junit.Assert.*; +import java.io.File; + public class TestUDFXPathUtil { @Test @@ -78,5 +81,23 @@ public void testEvalPositive() { assertTrue(result instanceof NodeList); assertEquals(5, ((NodeList)result).getLength()); } - + + @Test + public void testEmbedFailure() throws Exception { + + String secretValue = String.valueOf(Math.random()); + File tempFile = File.createTempFile("verifyembed", ".tmp"); + tempFile.deleteOnExit(); + String fname = tempFile.getAbsolutePath(); + + FileUtils.writeStringToFile(tempFile, secretValue); + + String xml = "\n" + + " \n" + + "]>\n" + + "&embed;"; + String evaled = new UDFXPathUtil().evalString(xml, "/foo"); + assertTrue(evaled.isEmpty()); + } }