diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java index 7fc0ae5..a0942a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java @@ -22,7 +22,11 @@ import java.io.Reader; import java.io.StringReader; +import javax.xml.XMLConstants; import javax.xml.namespace.QName; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; @@ -38,9 +42,13 @@ * of this class. */ public class UDFXPathUtil { + static final boolean DISABLE_XINCLUDE = true; + private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + private DocumentBuilder builder = null; private XPath xpath = XPathFactory.newInstance().newXPath(); private ReusableStringReader reader = new ReusableStringReader(); private InputSource inputSource = new InputSource(reader); + private XPathExpression expression = null; private String oldPath = null; @@ -66,12 +74,33 @@ public Object eval(String xml, String path, QName qname) { return null; } + if (builder == null){ + initializeDocumentBuilderFactory(); + try { + builder = dbf.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException ("Error instantiating DocumentBuilder, cannot build xml parser",e); + } + } + reader.set(xml); try { - return expression.evaluate(inputSource, qname); + return expression.evaluate(builder.parse(inputSource),qname); } catch (XPathExpressionException e) { throw new RuntimeException ("Invalid expression '" + oldPath + "'", e); + } catch (Exception e) { + throw new RuntimeException ("Error loading expression '"+ oldPath + "'",e); + } + } + + private void initializeDocumentBuilderFactory() { + + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + + if (DISABLE_XINCLUDE){ + dbf.setXIncludeAware(false); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java index 53966fc..0d34860 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java @@ -20,12 +20,15 @@ import javax.xml.xpath.XPathConstants; +import org.apache.commons.io.FileUtils; import org.junit.Test; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import static org.junit.Assert.*; +import java.io.File; + public class TestUDFXPathUtil { @Test @@ -78,5 +81,30 @@ public void testEvalPositive() { assertTrue(result instanceof NodeList); assertEquals(5, ((NodeList)result).getLength()); } - + + @Test + public void testEmbedFailure() throws Exception { + + String secretValue = String.valueOf(Math.random()); + File tempFile = File.createTempFile("verifyembed", ".tmp"); + tempFile.deleteOnExit(); + String fname = tempFile.getAbsolutePath(); + + FileUtils.writeStringToFile(tempFile, secretValue); + + String xml = "\n" + + " \n" + + "]>\n" + + "&embed;"; + + String evaled = null; + Exception caught = null; + try { + evaled = new UDFXPathUtil().evalString(xml, "/foo"); + } catch (Exception e){ + caught = e; + } + assertTrue(caught.getCause().getMessage().contains("\'file\' access is not allowed due to restriction set by the accessExternalDTD property")); + } }