diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/udf/json_read/JsonReadBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/udf/json_read/JsonReadBench.java new file mode 100644 index 0000000000..fb2fba7772 --- /dev/null +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/udf/json_read/JsonReadBench.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.udf.json_read; + +import java.io.IOException; +import java.nio.charset.Charset; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFJsonRead; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.Test; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; + +public class JsonReadBench { + + @State(Scope.Thread) + public static class MyState { + + public final String json; + public String type; + + public MyState() { + try { + json = getResource("val1.json"); + type = getResource("val1.type").toLowerCase().trim(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private String getResource(String fname) throws IOException { + return IOUtils.toString(JsonReadBench.class.getResourceAsStream(fname), Charset.defaultCharset()); + } + } + + @Test + public void checkBenchMarkMethod() throws Exception { + benchmarkMethod(new MyState()); + } + + @Benchmark + public void benchmarkMethod(MyState state) throws Exception { + GenericUDFJsonRead udf = new GenericUDFJsonRead(); + ObjectInspector[] arguments = buildArguments(state.type); + udf.initialize(arguments); + + udf.evaluate(evalArgs(state.json)); + } + + private DeferredObject[] evalArgs(String string) { + return new DeferredObject[] { new GenericUDF.DeferredJavaObject(new Text(string)), null }; + } + + private ObjectInspector[] buildArguments(String typeStr) { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI, PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, new Text(typeStr)) }; + return arguments; + } + +} diff --git itests/hive-jmh/src/main/resources/org/apache/hive/benchmark/udf/json_read/val1.json itests/hive-jmh/src/main/resources/org/apache/hive/benchmark/udf/json_read/val1.json new file mode 100644 index 0000000000..4466539fef --- /dev/null +++ itests/hive-jmh/src/main/resources/org/apache/hive/benchmark/udf/json_read/val1.json @@ -0,0 +1,86 @@ +[ +{ + "t0":"2017-08-1414:45:23.522000", + "business_id": "vcNAWiLM4dR7D2nwwJ7nCA", + "hours": { + "Tuesday": { + "close": "17:00", + "open": "08:00" + }, + "Friday": { + "close": "17:00", + "open": "08:00" + } + }, + "open": true, + "categories": [ + "Doctors", + "Health & Medical" + ], + "review_count": 9, + "name": "Eric Goldberg, MD", + "neighborhoods": [], + "attributes": { + "By Appointment Only": true, + "Accepts Credit Cards": true, + "Good For Groups": 1 + }, + "type": "business" +} +, +{ + "business_id": "vcNAWiLM4dR7D2nwwJ7nCA", + "hours": { + "Tuesday": { + "close": "17:00", + "open": "08:00" + }, + "Friday": { + "close": "17:00", + "open": "08:00" + } + }, + "open": true, + "categories": [ + "Doctors", + "Health & Medical" + ], + "review_count": 9, + "name": "Eric Goldberg, MD", + "neighborhoods": [], + "attributes": { + "By Appointment Only": true, + "Accepts Credit Cards": true, + "Good For Groups": 1 + }, + "type": "business" +} +, +{ + "business_id": "vcNAWiLM4dR7D2nwwJ7nCA", + "hours": { + "Tuesday": { + "close": "17:00", + "open": "08:00" + }, + "Friday": { + "close": "17:00", + "open": "08:00" + } + }, + "open": true, + "categories": [ + "Doctors", + "Health & Medical" + ], + "review_count": 9, + "name": "Eric Goldberg, MD", + "neighborhoods": [], + "attributes": { + "By Appointment Only": true, + "Accepts Credit Cards": true, + "Good For Groups": 1 + }, + "type": "business" +} +] diff --git itests/hive-jmh/src/main/resources/org/apache/hive/benchmark/udf/json_read/val1.type itests/hive-jmh/src/main/resources/org/apache/hive/benchmark/udf/json_read/val1.type new file mode 100644 index 0000000000..35432230d6 --- /dev/null +++ itests/hive-jmh/src/main/resources/org/apache/hive/benchmark/udf/json_read/val1.type @@ -0,0 +1 @@ +array,business_id:string,categories:array,hours:map>,name:string,neighborhoods:array,open:boolean,review_count:int,type:string>> diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 76e85636d1..749ff4d80e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -213,6 +213,7 @@ system.registerGenericUDF("ceiling", GenericUDFCeil.class); system.registerUDF("rand", UDFRand.class, false); system.registerGenericUDF("abs", GenericUDFAbs.class); + system.registerGenericUDF("json_read", GenericUDFJsonRead.class); system.registerGenericUDF("sq_count_check", GenericUDFSQCountCheck.class); system.registerGenericUDF("pmod", GenericUDFPosMod.class); @@ -1660,7 +1661,9 @@ public static FunctionInfo registerPermanentFunction(String functionName, public static boolean isPermanentFunction(ExprNodeGenericFuncDesc fnExpr) { GenericUDF udf = fnExpr.getGenericUDF(); - if (udf == null) return false; + if (udf == null) { + return false; + } Class clazz = udf.getClass(); if (udf instanceof GenericUDFBridge) { @@ -1786,7 +1789,9 @@ public static boolean isRankingFunction(String name) throws SemanticException { */ public static boolean isBuiltInFuncExpr(ExprNodeGenericFuncDesc fnExpr) { GenericUDF udf = fnExpr.getGenericUDF(); - if (udf == null) return false; + if (udf == null) { + return false; + } Class clazz = udf.getClass(); if (udf instanceof GenericUDFBridge) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonRead.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonRead.java new file mode 100644 index 0000000000..3fa817f825 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonRead.java @@ -0,0 +1,285 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TextConverter; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +@Description(name = "json_read", value = "_FUNC_(json,type) - parses the given json according to the given complex type specification", extended = "" + + "Parsed as null: if the json is null, it is the empty string or if it contains only whitespaces\n" + + "Example:\n" + "select _FUNC_('[]','array>' ") +public class GenericUDFJsonRead extends GenericUDF { + + private ObjectInspector outputOI; + private transient JsonFactory factory; + private TextConverter inputConverter; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + + checkArgsSize(arguments, 2, 2); + checkArgPrimitive(arguments, 0); + checkArgPrimitive(arguments, 1); + if (!ObjectInspectorUtils.isConstantObjectInspector(arguments[1])) { + throw new UDFArgumentTypeException(1, getFuncName() + " argument 2 may only be a constant"); + } + + inputConverter = new TextConverter((PrimitiveObjectInspector) arguments[0]); + String typeStr = getConstantStringValue(arguments, 1); + + try { + TypeInfo t = TypeInfoUtils.getTypeInfoFromTypeString(typeStr); + outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(t); + } catch (Exception e) { + throw new UDFArgumentException(getFuncName() + ": Error parsing typestring: " + e.getMessage()); + } + + factory = new JsonFactory(); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object valObject = arguments[0].get(); + if (valObject == null) { + return null; + } + + try { + String text = inputConverter.convert(valObject).toString(); + if (text.trim().length() == 0) { + return null; + } + JsonParser parser = factory.createParser(text); + + try { + parser.nextToken(); + Object res = parseDispatcher(parser, outputOI); + return res; + } catch (Exception e) { + String locationStr = parser.getCurrentLocation().getLineNr() + "," + parser.getCurrentLocation().getColumnNr(); + throw new HiveException("at[" + locationStr + "]: " + e.getMessage(), e); + } + } catch (Exception e) { + throw new HiveException("Error parsing json: " + e.getMessage(), e); + } + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("json_read", children); + } + + private static Object parseDispatcher(JsonParser parser, ObjectInspector oi) + throws JsonParseException, IOException, HiveException { + + switch (oi.getCategory()) { + case PRIMITIVE: + return parsePrimitive(parser, (PrimitiveObjectInspector) oi); + case LIST: + return parseList(parser, (ListObjectInspector) oi); + case STRUCT: + return parseStruct(parser, (StructObjectInspector) oi); + case MAP: + return parseMap(parser, (MapObjectInspector) oi); + default: + throw new HiveException("parsing of: " + oi.getCategory() + " is not handled"); + } + } + + private static Object parseMap(JsonParser parser, MapObjectInspector oi) throws IOException, HiveException { + + if (parser.getCurrentToken() == JsonToken.VALUE_NULL) { + parser.nextToken(); + return null; + } + + Map ret = new LinkedHashMap<>(); + + if (parser.getCurrentToken() != JsonToken.START_OBJECT) { + throw new HiveException("struct expected"); + } + + if(!(oi.getMapKeyObjectInspector() instanceof PrimitiveObjectInspector ) ) { + throw new HiveException("map key must be a primitive"); + } + PrimitiveObjectInspector keyOI = (PrimitiveObjectInspector) oi.getMapKeyObjectInspector(); + ObjectInspector valOI = oi.getMapValueObjectInspector(); + + JsonToken currentToken = parser.nextToken(); + while (currentToken != null && currentToken != JsonToken.END_OBJECT) { + + if (currentToken != JsonToken.FIELD_NAME) { + throw new HiveException("unexpected token: " + currentToken); + } + + Object key = parseMapKey(parser, keyOI); + Object val = parseDispatcher(parser, valOI); + ret.put(key, val); + + currentToken = parser.getCurrentToken(); + } + if (currentToken != null) { + parser.nextToken(); + } + return ret; + + } + + private static Object parseStruct(JsonParser parser, StructObjectInspector oi) + throws JsonParseException, IOException, HiveException { + + Object ret[] = new Object[oi.getAllStructFieldRefs().size()]; + + if (parser.getCurrentToken() == JsonToken.VALUE_NULL) { + parser.nextToken(); + return null; + } + if (parser.getCurrentToken() != JsonToken.START_OBJECT) { + throw new HiveException("struct expected"); + } + JsonToken currentToken = parser.nextToken(); + while (currentToken != null && currentToken != JsonToken.END_OBJECT) { + + switch (currentToken) { + case FIELD_NAME: + String name = parser.getCurrentName(); + try { + StructField field = oi.getStructFieldRef(name); + if (field == null) { + throw new HiveException("undeclared field"); + } + parser.nextToken(); + ret[field.getFieldID()] = parseDispatcher(parser, field.getFieldObjectInspector()); + } catch (Exception e) { + throw new HiveException("struct field " + name + ": " + e.getMessage(), e); + } + break; + default: + throw new HiveException("unexpected token: " + currentToken); + } + currentToken = parser.getCurrentToken(); + } + if (currentToken != null) { + parser.nextToken(); + } + return ret; + } + + private static Object parseList(JsonParser parser, ListObjectInspector oi) + throws JsonParseException, IOException, HiveException { + List ret = new ArrayList<>(); + + if (parser.getCurrentToken() == JsonToken.VALUE_NULL) { + parser.nextToken(); + return null; + } + + if (parser.getCurrentToken() != JsonToken.START_ARRAY) { + throw new HiveException("array expected"); + } + JsonToken currentToken = parser.nextToken(); + try { + while (currentToken != null && currentToken != JsonToken.END_ARRAY) { + ObjectInspector eOI = oi.getListElementObjectInspector(); + ret.add(parseDispatcher(parser, eOI)); + currentToken = parser.getCurrentToken(); + } + } catch (Exception e) { + throw new HiveException("array: " + e.getMessage(), e); + } + currentToken = parser.nextToken(); + + return ret; + } + + private static Object parsePrimitive(JsonParser parser, PrimitiveObjectInspector oi) + throws HiveException, IOException { + JsonToken currentToken = parser.getCurrentToken(); + if (currentToken == null) { + return null; + } + try { + switch (parser.getCurrentToken()) { + case VALUE_FALSE: + case VALUE_TRUE: + case VALUE_NUMBER_INT: + case VALUE_NUMBER_FLOAT: + case VALUE_STRING: + Converter c = + ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi); + return c.convert(parser.getValueAsString()); + case VALUE_NULL: + return null; + default: + throw new HiveException("unexpected token type: " + currentToken); + } + } finally { + parser.nextToken(); + + } + } + + private static Object parseMapKey(JsonParser parser, PrimitiveObjectInspector oi) throws HiveException, IOException { + JsonToken currentToken = parser.getCurrentToken(); + if (currentToken == null) { + return null; + } + try { + switch (parser.getCurrentToken()) { + case FIELD_NAME: + Converter c = + ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi); + return c.convert(parser.getValueAsString()); + case VALUE_NULL: + return null; + default: + throw new HiveException("unexpected token type: " + currentToken); + } + } finally { + parser.nextToken(); + + } + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonRead.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonRead.java new file mode 100644 index 0000000000..92bb5419de --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonRead.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.Test; + +public class TestGenericUDFJsonRead { + + @Test(expected = UDFArgumentException.class) + public void testArgCnt1() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI }; + udf.initialize(arguments); + } + } + + @Test(expected = UDFArgumentException.class) + public void testArgCnt3() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI, valueOI }; + udf.initialize(arguments); + } + } + + @Test(expected = UDFArgumentException.class) + public void testArgInvalidType() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("__invalid__type__"); + udf.initialize(arguments); + } + } + + @Test + public void testList() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("array"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("[\"a\",\"b\",null]")); + assertTrue(res instanceof List); + List l = (List) res; + assertEquals(3, l.size()); + assertEquals(new Text("a"), l.get(0)); + assertEquals(new Text("b"), l.get(1)); + assertEquals(null, l.get(2)); + } + } + + @Test + public void testListNull() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("array"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("null")); + assertNull(res); + } + } + + @Test + public void testSimpleStruct() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("struct"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("{\"a\":\"b\"}")); + assertTrue(res instanceof Object[]); + Object o[] = (Object[]) res; + assertEquals(new Text("b"), o[0]); + } + } + + @Test + public void testStructNullField() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("struct"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("{\"a\":null}")); + assertTrue(res instanceof Object[]); + Object o[] = (Object[]) res; + assertEquals(null, o[0]); + } + } + + @Test + public void testStructEmptyString() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("struct"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("")); + assertNull(res); + } + } + + @Test + public void testStructNull() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("struct"); + udf.initialize(arguments); + + Object res = udf.evaluate(new DeferredObject[] { new DeferredJavaObject(null), null }); + assertNull(res); + } + } + + @Test + public void testStructNullComplexField() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("struct>"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("{\"a\":null}")); + assertTrue(res instanceof Object[]); + Object o[] = (Object[]) res; + assertEquals(null, o[0]); + } + } + + @Test(expected = HiveException.class) + public void testUndeclaredStructField() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("struct"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("{\"b\":null}")); + assertTrue(res instanceof Object[]); + Object o[] = (Object[]) res; + assertEquals(null, o[0]); + } + } + + @Test(expected = HiveException.class) + public void testUnexpectedStruct() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("array"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("[1,22,2,{\"b\":null}]")); + assertTrue(res instanceof Object[]); + Object o[] = (Object[]) res; + assertEquals(null, o[0]); + } + } + + @Test + public void testMap() throws Exception { + try (GenericUDFJsonRead udf = new GenericUDFJsonRead()) { + ObjectInspector[] arguments = buildArguments("map"); + udf.initialize(arguments); + + Object res = udf.evaluate(evalArgs("{\"a\":\"v\"}")); + assertTrue(res instanceof Map); + Map o = (Map) res; + assertEquals(1, o.size()); + assertEquals(new Text("v"), o.get(new Text("a"))); + } + } + + private DeferredObject[] evalArgs(String string) { + return new DeferredObject[] { new DeferredJavaObject(new Text(string)), null }; + } + + private ObjectInspector[] buildArguments(String typeStr) { + ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI, PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, new Text(typeStr)) }; + return arguments; + } + +} diff --git ql/src/test/queries/clientpositive/udf_json_read.q ql/src/test/queries/clientpositive/udf_json_read.q new file mode 100644 index 0000000000..30c297b4df --- /dev/null +++ ql/src/test/queries/clientpositive/udf_json_read.q @@ -0,0 +1,44 @@ +DESCRIBE FUNCTION java_read; +DESCRIBE FUNCTION EXTENDED java_read; + + +select json_read('[{"name":"john","alias":"j","address":{"city":"LA"}},{"name":"kinga","alias":"binga","age":2}]', + 'array>>'); + +create table t (info array>>); + +insert into t + select json_read('[{"name":"john","alias":"j","address":{"city":"LA"}},{"name":"kinga","alias":"binga","age":2}]', + 'array>>'); + + + +select json_read('[ +{ + "business_id": "vcNAWiLM4dR7D2nwwJ7nCA", + "hours": { + "Tuesday": { + "close": "17:00", + "open": "08:00" + }, + "Friday": { + "close": "17:00", + "open": "08:00" + } + }, + "open": true, + "categories": [ + "Doctors", + "Health & Medical" + ], + "review_count": 9, + "name": "Eric Goldberg, MD", + "neighborhoods": [], + "attributes": { + "By Appointment Only": true, + "Accepts Credit Cards": true, + "Good For Groups": 1 + }, + "type": "business" +} +]','array,business_id:string,categories:array,hours:map>,name:string,neighborhoods:array,open:boolean,review_count:int,type:string>>'); diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 43e4a5de39..89dfe0cb26 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -128,6 +128,7 @@ isnottrue isnull istrue java_method +json_read json_tuple lag last_day diff --git ql/src/test/results/clientpositive/udf_json_read.q.out ql/src/test/results/clientpositive/udf_json_read.q.out new file mode 100644 index 0000000000..05b1eb8178 --- /dev/null +++ ql/src/test/results/clientpositive/udf_json_read.q.out @@ -0,0 +1,107 @@ +PREHOOK: query: DESCRIBE FUNCTION java_read +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION java_read +POSTHOOK: type: DESCFUNCTION +Function 'java_read' does not exist. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED java_read +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED java_read +POSTHOOK: type: DESCFUNCTION +Function 'java_read' does not exist. +PREHOOK: query: select json_read('[{"name":"john","alias":"j","address":{"city":"LA"}},{"name":"kinga","alias":"binga","age":2}]', + 'array>>') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select json_read('[{"name":"john","alias":"j","address":{"city":"LA"}},{"name":"kinga","alias":"binga","age":2}]', + 'array>>') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +[{"name":"john","age":null,"alias":"j","address":{"city":"LA","street":null}},{"name":"kinga","age":2,"alias":"binga","address":null}] +PREHOOK: query: create table t (info array>>) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (info array>>) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t + select json_read('[{"name":"john","alias":"j","address":{"city":"LA"}},{"name":"kinga","alias":"binga","age":2}]', + 'array>>') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t + select json_read('[{"name":"john","alias":"j","address":{"city":"LA"}},{"name":"kinga","alias":"binga","age":2}]', + 'array>>') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.info EXPRESSION [] +PREHOOK: query: select json_read('[ +{ + "business_id": "vcNAWiLM4dR7D2nwwJ7nCA", + "hours": { + "Tuesday": { + "close": "17:00", + "open": "08:00" + }, + "Friday": { + "close": "17:00", + "open": "08:00" + } + }, + "open": true, + "categories": [ + "Doctors", + "Health & Medical" + ], + "review_count": 9, + "name": "Eric Goldberg, MD", + "neighborhoods": [], + "attributes": { + "By Appointment Only": true, + "Accepts Credit Cards": true, + "Good For Groups": 1 + }, + "type": "business" +} +]','array,business_id:string,categories:array,hours:map>,name:string,neighborhoods:array,open:boolean,review_count:int,type:string>>') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select json_read('[ +{ + "business_id": "vcNAWiLM4dR7D2nwwJ7nCA", + "hours": { + "Tuesday": { + "close": "17:00", + "open": "08:00" + }, + "Friday": { + "close": "17:00", + "open": "08:00" + } + }, + "open": true, + "categories": [ + "Doctors", + "Health & Medical" + ], + "review_count": 9, + "name": "Eric Goldberg, MD", + "neighborhoods": [], + "attributes": { + "By Appointment Only": true, + "Accepts Credit Cards": true, + "Good For Groups": 1 + }, + "type": "business" +} +]','array,business_id:string,categories:array,hours:map>,name:string,neighborhoods:array,open:boolean,review_count:int,type:string>>') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +[{"attributes":{"accepts credit cards":true,"by appointment only":true,"good for groups":1},"business_id":"vcNAWiLM4dR7D2nwwJ7nCA","categories":["Doctors","Health & Medical"],"hours":{"Tuesday":{"close":"17:00","open":"08:00"},"Friday":{"close":"17:00","open":"08:00"}},"name":"Eric Goldberg, MD","neighborhoods":[],"open":true,"review_count":9,"type":"business"}]