diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java index cf771d4..66778e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java @@ -26,6 +26,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.collect.Iterators; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; @@ -215,41 +216,51 @@ private Object extract(Object json, String path) { return json; } - List jsonList = new ArrayList(); + private transient AddingList jsonList = new AddingList(); + + private static class AddingList extends ArrayList { + @Override + public Iterator iterator() { + return Iterators.forArray(toArray()); + } + @Override + public void removeRange(int fromIndex, int toIndex) { + super.removeRange(fromIndex, toIndex); + } + }; @SuppressWarnings("unchecked") private Object extract_json_withindex(Object json, ArrayList indexList) { jsonList.clear(); jsonList.add(json); - Iterator itr = indexList.iterator(); - while (itr.hasNext()) { - String index = itr.next(); - List tmp_jsonList = new ArrayList(); + for (String index : indexList) { + int targets = jsonList.size(); if (index.equalsIgnoreCase("*")) { - for (int i = 0; i < jsonList.size(); i++) { - Object array = jsonList.get(i); + for (Object array : jsonList) { if (array instanceof List) { for (int j = 0; j < ((List)array).size(); j++) { - tmp_jsonList.add(((List)array).get(j)); + jsonList.add(((List)array).get(j)); } } } - jsonList = tmp_jsonList; } else { - for (int i = 0; i < (jsonList).size(); i++) { - Object array = jsonList.get(i); + for (Object array : jsonList) { int indexValue = Integer.parseInt(index); if (!(array instanceof List)) { continue; } - if (indexValue >= ((List)array).size()) { - return null; + List list = (List) array; + if (indexValue >= list.size()) { + continue; } - tmp_jsonList.add(((List)array).get(indexValue)); - jsonList = tmp_jsonList; + jsonList.add(list.get(indexValue)); } } + if (jsonList.size() == targets) { + return null; + } + jsonList.removeRange(0, targets); } if (jsonList.isEmpty()) { return null; diff --git ql/src/test/results/clientpositive/udf_get_json_object.q.out ql/src/test/results/clientpositive/udf_get_json_object.q.out index 675a630..3be3b33 100644 --- ql/src/test/results/clientpositive/udf_get_json_object.q.out +++ ql/src/test/results/clientpositive/udf_get_json_object.q.out @@ -130,7 +130,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src_json #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] -2 [[1,2,{"b":"y","a":"x"}],[3,4],[5,6]] 1 [1,2,{"b":"y","a":"x"}] [1,2,{"b":"y","a":"x"},3,4,5,6] y ["y"] +2 [[1,2,{"b":"y","a":"x"}],[3,4],[5,6]] [1,3,5] [1,2,{"b":"y","a":"x"}] [1,2,{"b":"y","a":"x"},3,4,5,6] y ["y"] PREHOOK: query: SELECT get_json_object(src_json.json, '$.non_exist_key'), get_json_object(src_json.json, '$..no_recursive'), get_json_object(src_json.json, '$.store.book[10]'), get_json_object(src_json.json, '$.store.book[0].non_exist_key'), get_json_object(src_json.json, '$.store.basket[*].non_exist_key'), get_json_object(src_json.json, '$.store.basket[0][*].non_exist_key') FROM src_json PREHOOK: type: QUERY PREHOOK: Input: default@src_json