From 476f6ecf6c830c078ada872b0051b1bbb12696c1 Mon Sep 17 00:00:00 2001 From: Mike Lewis Date: Thu, 21 Oct 2010 14:00:34 -0700 Subject: [PATCH] Updated UDFJson to allow arrays as a root object --- .../org/apache/hadoop/hive/ql/udf/UDFJson.java | 53 ++++++++++++------- 1 files changed, 33 insertions(+), 20 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java index 649a41f..8e98c59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java @@ -27,7 +27,6 @@ import java.util.regex.Pattern; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.io.Text; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -78,7 +77,7 @@ public class UDFJson extends UDF { } - static Map extractObjectCache = new HashCache(); + static Map extractObjectCache = new HashCache(); static Map pathExprCache = new HashCache(); static Map> indexListCache = new HashCache>(); static Map mKeyGroup1Cache = new HashCache(); @@ -123,14 +122,23 @@ public class UDFJson extends UDF { pathExprCache.put(pathString, pathExpr); } - if (!pathExpr[0].equalsIgnoreCase("$")) { + if (!pathExpr[0].startsWith("$")) { return null; } // Cache extractObject - Object extractObject = extractObjectCache.get(jsonString); + + String[] objCachKey = new String[] {jsonString, pathString}; + + Object extractObject = extractObjectCache.get(objCachKey); if (extractObject == null) { - extractObject = new JSONObject(jsonString); - extractObjectCache.put(jsonString, extractObject); + char firstChar = jsonString.trim().charAt(0); + if (firstChar == '{') { + extractObject = new JSONObject(jsonString); + } else if (firstChar == '[') { + extractObject = new JSONArray(jsonString); + extractObject = extract_json_withindex_outer(extractObject, pathExpr[0]); + } + extractObjectCache.put(objCachKey, extractObject); } for (int i = 1; i < pathExpr.length; i++) { extractObject = extract(extractObject, pathExpr[i]); @@ -167,24 +175,29 @@ public class UDFJson extends UDF { } json = extract_json_withkey(json, mKeyGroup1); - // Cache indexList - ArrayList indexList = indexListCache.get(path); - if (indexList == null) { - Matcher mIndex = patternIndex.matcher(path); - indexList = new ArrayList(); - while (mIndex.find()) { - indexList.add(mIndex.group(1)); - } - indexListCache.put(path, indexList); - } - - if (indexList.size() > 0) { - json = extract_json_withindex(json, indexList); - } + json = extract_json_withindex_outer(json, path); return json; } + private Object extract_json_withindex_outer(Object json, String path) throws JSONException { + // Cache indexList + ArrayList indexList = indexListCache.get(path); + if (indexList == null) { + Matcher mIndex = patternIndex.matcher(path); + indexList = new ArrayList(); + while (mIndex.find()) { + indexList.add(mIndex.group(1)); + } + indexListCache.put(path, indexList); + } + + if (indexList.size() > 0) { + json = extract_json_withindex(json, indexList); + } + return json; + } + ArrayList jsonList = new ArrayList(); private Object extract_json_withindex(Object json, ArrayList indexList) -- 1.7.2.3