diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java index 2c42fae..0c54754 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java @@ -59,20 +59,14 @@ + " [,] : Union operator\n" + " [start:end:step] : array slice operator\n") public class UDFJson extends UDF { - private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); - private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); - - private static final JsonFactory JSON_FACTORY = new JsonFactory(); - static { - // Allows for unescaped ASCII control characters in JSON values - JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); - // Enabled to accept quoting of all character backslash qooting mechanism - JSON_FACTORY.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); - } - private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY); + private static final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); + private static final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); private static final JavaType LIST_TYPE = TypeFactory.fromClass(List.class); + private final JsonFactory jsonFactory = new JsonFactory(); + private final ObjectMapper objectMapper = new ObjectMapper(jsonFactory); + // An LRU cache using a linked hash map static class HashCache extends LinkedHashMap { @@ -93,16 +87,18 @@ protected boolean removeEldestEntry(Map.Entry eldest) { } - static Map extractObjectCache = new HashCache(); - static Map pathExprCache = new HashCache(); - static Map> indexListCache = + Map extractObjectCache = new HashCache(); + Map pathExprCache = new HashCache(); + Map> indexListCache = new HashCache>(); - static Map mKeyGroup1Cache = new HashCache(); - static Map mKeyMatchesCache = new HashCache(); - - Text result = new Text(); + Map mKeyGroup1Cache = new HashCache(); + Map mKeyMatchesCache = new HashCache(); public UDFJson() { + // Allows for unescaped ASCII control characters in JSON values + jsonFactory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); + // Enabled to accept quoting of all character backslash qooting mechanism + jsonFactory.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); } /** @@ -125,13 +121,13 @@ public UDFJson() { * @return json string or null when an error happens. */ public Text evaluate(String jsonString, String pathString) { - if (jsonString == null || jsonString.isEmpty() || pathString == null || pathString.isEmpty() || pathString.charAt(0) != '$') { return null; } int pathExprStart = 1; + boolean unknownType = pathString.equals("$"); boolean isRootArray = false; if (pathString.length() > 1) { @@ -155,23 +151,41 @@ public Text evaluate(String jsonString, String pathString) { // Cache extractObject Object extractObject = extractObjectCache.get(jsonString); if (extractObject == null) { - JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE; - try { - extractObject = MAPPER.readValue(jsonString, javaType); - } catch (Exception e) { - return null; + if (unknownType) { + try { + extractObject = objectMapper.readValue(jsonString, LIST_TYPE); + } catch (Exception e) { + // Ignore exception + } + if (extractObject == null) { + try { + extractObject = objectMapper.readValue(jsonString, MAP_TYPE); + } catch (Exception e) { + return null; + } + } + } else { + JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE; + try { + extractObject = objectMapper.readValue(jsonString, javaType); + } catch (Exception e) { + return null; + } } extractObjectCache.put(jsonString, extractObject); } + for (int i = pathExprStart; i < pathExpr.length; i++) { if (extractObject == null) { return null; } extractObject = extract(extractObject, pathExpr[i], i == pathExprStart && isRootArray); } + + Text result = new Text(); if (extractObject instanceof Map || extractObject instanceof List) { try { - result.set(MAPPER.writeValueAsString(extractObject)); + result.set(objectMapper.writeValueAsString(extractObject)); } catch (Exception e) { return null; } @@ -235,6 +249,8 @@ private Object extract(Object json, String path, boolean skipMapProc) { private transient AddingList jsonList = new AddingList(); private static class AddingList extends ArrayList { + private static final long serialVersionUID = 1L; + @Override public Iterator iterator() { return Iterators.forArray(toArray()); @@ -247,7 +263,6 @@ public void removeRange(int fromIndex, int toIndex) { @SuppressWarnings("unchecked") private Object extract_json_withindex(Object json, ArrayList indexList) { - jsonList.clear(); jsonList.add(json); for (String index : indexList) {