Index: ql/src/test/results/clientpositive/udf_get_json_object.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_get_json_object.q.out (revision 1374098) +++ ql/src/test/results/clientpositive/udf_get_json_object.q.out (working copy) @@ -92,7 +92,7 @@ POSTHOOK: Input: default@src_json #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] -amy {"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"book":[{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95},{"author":"Herman Melville","category":"fiction","title":"Moby Dick","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","category":"fiction","title":"The Lord of the Rings","price":22.99,"reader":[{"name":"bob","age":25},{"name":"jack","age":26}],"isbn":"0-395-19395-8"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"bicycle":{"price":19.95,"color":"red"}} +amy {"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}} PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.bicycle'), get_json_object(src_json.json, '$.store.book') FROM src_json PREHOOK: type: QUERY PREHOOK: Input: default@src_json @@ -102,7 +102,7 @@ POSTHOOK: Input: default@src_json #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] -{"price":19.95,"color":"red"} [{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95},{"author":"Herman Melville","category":"fiction","title":"Moby Dick","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","category":"fiction","title":"The Lord of the Rings","price":22.99,"reader":[{"name":"bob","age":25},{"name":"jack","age":26}],"isbn":"0-395-19395-8"}] +{"price":19.95,"color":"red"} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}] PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.book[0]'), get_json_object(src_json.json, '$.store.book[*]') FROM src_json PREHOOK: type: QUERY PREHOOK: Input: default@src_json @@ -112,7 +112,7 @@ POSTHOOK: Input: default@src_json #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] -{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95} [{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95},{"author":"Herman Melville","category":"fiction","title":"Moby Dick","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","category":"fiction","title":"The Lord of the Rings","price":22.99,"reader":[{"name":"bob","age":25},{"name":"jack","age":26}],"isbn":"0-395-19395-8"}] +{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}] PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.book[0].category'), get_json_object(src_json.json, '$.store.book[*].category'), get_json_object(src_json.json, '$.store.book[*].isbn'), get_json_object(src_json.json, '$.store.book[*].reader') FROM src_json PREHOOK: type: QUERY PREHOOK: Input: default@src_json @@ -122,7 +122,7 @@ POSTHOOK: Input: default@src_json #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] -reference ["reference","fiction","fiction"] ["0-553-21311-3","0-395-19395-8"] [{"name":"bob","age":25},{"name":"jack","age":26}] +reference ["reference","fiction","fiction"] ["0-553-21311-3","0-395-19395-8"] [{"age":25,"name":"bob"},{"age":26,"name":"jack"}] PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.book[*].reader[0].age'), get_json_object(src_json.json, '$.store.book[*].reader[*].age') FROM src_json PREHOOK: type: QUERY PREHOOK: Input: default@src_json Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (revision 1374098) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (working copy) @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -34,8 +35,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; -import org.json.JSONException; -import org.json.JSONObject; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.type.TypeFactory; +import org.codehaus.jackson.type.JavaType; + /** * GenericUDTFJSONTuple: this * @@ -48,6 +51,9 @@ private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName()); + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); + int numCols; // number of output columns String[] paths; // array of path expressions, each of which corresponds to a column Text[] retCols; // array of returned column values @@ -77,7 +83,7 @@ } - static Map jsonObjectCache = new HashCache(); + static Map jsonObjectCache = new HashCache(); @Override public void close() throws HiveException { @@ -127,6 +133,7 @@ return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } + @SuppressWarnings("unchecked") @Override public void process(Object[] o) throws HiveException { @@ -148,32 +155,39 @@ return; } try { - JSONObject jsonObj = jsonObjectCache.get(jsonStr); + Object jsonObj = jsonObjectCache.get(jsonStr); if (jsonObj == null) { - jsonObj = new JSONObject(jsonStr); + try { + jsonObj = MAPPER.readValue(jsonStr, MAP_TYPE); + } catch (Exception e) { + reportInvalidJson(jsonStr); + forward(nullCols); + return; + } jsonObjectCache.put(jsonStr, jsonObj); } + if (!(jsonObj instanceof Map)) { + reportInvalidJson(jsonStr); + forward(nullCols); + return; + } + for (int i = 0; i < numCols; ++i) { - if (jsonObj.isNull(paths[i])) { + if (retCols[i] == null) { + retCols[i] = cols[i]; // use the object pool rather than creating a new object + } + Object extractObject = ((Map)jsonObj).get(paths[i]); + if (extractObject instanceof Map || extractObject instanceof List) { + retCols[i].set(MAPPER.writeValueAsString(extractObject)); + } else if (extractObject != null) { + retCols[i].set(extractObject.toString()); + } else { retCols[i] = null; - } else { - if (retCols[i] == null) { - retCols[i] = cols[i]; // use the object pool rather than creating a new object - } - retCols[i].set(jsonObj.getString(paths[i])); } } forward(retCols); return; - } catch (JSONException e) { - // parsing error, invalid JSON string - if (!seenErrors) { - LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error messages in the future."); - seenErrors = true; - } - forward(nullCols); - return; } catch (Throwable e) { LOG.error("JSON parsing/evaluation exception" + e); forward(nullCols); @@ -184,4 +198,12 @@ public String toString() { return "json_tuple"; } + + private void reportInvalidJson(String jsonStr) { + if (!seenErrors) { + LOG.error("The input is not a valid JSON string: " + jsonStr + + ". Skipping such error messages in the future."); + seenErrors = true; + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (revision 1374098) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (working copy) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -28,9 +29,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.type.TypeFactory; +import org.codehaus.jackson.type.JavaType; /** * UDFJson. @@ -58,6 +59,9 @@ private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); + // An LRU cache using a linked hash map static class HashCache extends LinkedHashMap { @@ -80,7 +84,8 @@ static Map extractObjectCache = new HashCache(); static Map pathExprCache = new HashCache(); - static Map> indexListCache = new HashCache>(); + static Map> indexListCache = + new HashCache>(); static Map mKeyGroup1Cache = new HashCache(); static Map mKeyMatchesCache = new HashCache(); @@ -115,34 +120,47 @@ return null; } - try { - // Cache pathExpr - String[] pathExpr = pathExprCache.get(pathString); - if (pathExpr == null) { - pathExpr = pathString.split("\\.", -1); - pathExprCache.put(pathString, pathExpr); - } + // Cache pathExpr + String[] pathExpr = pathExprCache.get(pathString); + if (pathExpr == null) { + pathExpr = pathString.split("\\.", -1); + pathExprCache.put(pathString, pathExpr); + } - if (!pathExpr[0].equalsIgnoreCase("$")) { + if (!pathExpr[0].equalsIgnoreCase("$")) { + return null; + } + // Cache extractObject + Object extractObject = extractObjectCache.get(jsonString); + if (extractObject == null) { + try { + extractObject = MAPPER.readValue(jsonString, MAP_TYPE); + } catch (Exception e) { return null; } - // Cache extractObject - Object extractObject = extractObjectCache.get(jsonString); + extractObjectCache.put(jsonString, extractObject); + } + for (int i = 1; i < pathExpr.length; i++) { if (extractObject == null) { - extractObject = new JSONObject(jsonString); - extractObjectCache.put(jsonString, extractObject); + return null; } - for (int i = 1; i < pathExpr.length; i++) { - extractObject = extract(extractObject, pathExpr[i]); + extractObject = extract(extractObject, pathExpr[i]); + } + if (extractObject instanceof Map || extractObject instanceof List) { + try { + result.set(MAPPER.writeValueAsString(extractObject)); + } catch (Exception e) { + return null; } + } else if (extractObject != null) { result.set(extractObject.toString()); - return result; - } catch (Exception e) { + } else { return null; } + return result; } - private Object extract(Object json, String path) throws JSONException { + private Object extract(Object json, String path) { // Cache patternkey.matcher(path).matches() Matcher mKey = null; @@ -185,68 +203,73 @@ return json; } - ArrayList jsonList = new ArrayList(); + List jsonList = new ArrayList(); - private Object extract_json_withindex(Object json, ArrayList indexList) - throws JSONException { + @SuppressWarnings("unchecked") + private Object extract_json_withindex(Object json, ArrayList indexList) { jsonList.clear(); jsonList.add(json); Iterator itr = indexList.iterator(); while (itr.hasNext()) { String index = itr.next(); - ArrayList tmp_jsonList = new ArrayList(); + List tmp_jsonList = new ArrayList(); if (index.equalsIgnoreCase("*")) { - for (int i = 0; i < (jsonList).size(); i++) { - try { - JSONArray array = (JSONArray) (jsonList).get(i); - for (int j = 0; j < array.length(); j++) { - tmp_jsonList.add(array.get(j)); + for (int i = 0; i < jsonList.size(); i++) { + Object array = jsonList.get(i); + if (array instanceof List) { + for (int j = 0; j < ((List)array).size(); j++) { + tmp_jsonList.add(((List)array).get(j)); } - } catch (Exception e) { - continue; } } jsonList = tmp_jsonList; } else { for (int i = 0; i < (jsonList).size(); i++) { - try { - tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer - .parseInt(index))); - } catch (ClassCastException e) { + Object array = jsonList.get(i); + int indexValue = Integer.parseInt(index); + if (!(array instanceof List)) { continue; - } catch (JSONException e) { + } + if (indexValue >= ((List)array).size()) { return null; } + tmp_jsonList.add(((List)array).get(indexValue)); jsonList = tmp_jsonList; } } } - return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0); + if (jsonList.isEmpty()) { + return null; + } + return (jsonList.size() > 1) ? new ArrayList(jsonList) : jsonList.get(0); } - private Object extract_json_withkey(Object json, String path) - throws JSONException { - if (json.getClass() == org.json.JSONArray.class) { - JSONArray jsonArray = new JSONArray(); - for (int i = 0; i < ((JSONArray) json).length(); i++) { - Object josn_elem = ((JSONArray) json).get(i); - try { - Object json_obj = ((JSONObject) josn_elem).get(path); - if (json_obj.getClass() == org.json.JSONArray.class) { - for (int j = 0; j < ((JSONArray) json_obj).length(); j++) { - jsonArray.put(((JSONArray) json_obj).get(j)); - } - } else { - jsonArray.put(json_obj); - } - } catch (Exception e) { + @SuppressWarnings("unchecked") + private Object extract_json_withkey(Object json, String path) { + if (json instanceof List) { + List jsonArray = new ArrayList(); + for (int i = 0; i < ((List) json).size(); i++) { + Object json_elem = ((List) json).get(i); + Object json_obj = null; + if (json_elem instanceof Map) { + json_obj = ((Map) json_elem).get(path); + } else { continue; } + if (json_obj instanceof List) { + for (int j = 0; j < ((List) json_obj).size(); j++) { + jsonArray.add(((List) json_obj).get(j)); + } + } else if (json_obj != null) { + jsonArray.add(json_obj); + } } - return (jsonArray.length() == 0) ? null : jsonArray; + return (jsonArray.size() == 0) ? null : jsonArray; + } else if (json instanceof Map) { + return ((Map) json).get(path); } else { - return ((JSONObject) json).get(path); + return null; } } }