diff --git hplsql/pom.xml hplsql/pom.xml index 11eb14fc8b..1c27ef4b9a 100644 --- hplsql/pom.xml +++ hplsql/pom.xml @@ -61,7 +61,7 @@ org.antlr antlr4-runtime - 4.5 + ${antlr4.version} org.apache.hadoop diff --git pom.xml pom.xml index 6a4b2501b3..716b21aa51 100644 --- pom.xml +++ pom.xml @@ -120,6 +120,7 @@ 5.5.0 1.9.1 3.5.2 + 4.5 1.5.6 0.1 diff --git ql/pom.xml ql/pom.xml index 5790f512a9..eddd180d2f 100644 --- ql/pom.xml +++ ql/pom.xml @@ -143,6 +143,11 @@ antlr-runtime ${antlr.version} + + org.antlr + antlr4-runtime + ${antlr4.version} + org.antlr ST4 @@ -843,6 +848,21 @@ + + org.antlr + antlr4-maven-plugin + 4.5 + + true + + + + + antlr4 + + + + org.apache.maven.plugins maven-antrun-plugin diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index d08b05fb68..ce72d66e81 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -330,6 +330,12 @@ system.registerUDF("get_json_object", UDFJson.class, false); + // New SQL 2016 JSON methods + system.registerGenericUDF("isjson", GenericUDFIsJson.class); + system.registerGenericUDF("isnotjson", GenericUDFIsNotJson.class); + system.registerGenericUDF("json_value", GenericUDFJsonValue.class); + system.registerGenericUDF("json_query", GenericUDFJsonQuery.class); + system.registerUDF("xpath_string", UDFXPathString.class, false); system.registerUDF("xpath_boolean", UDFXPathBoolean.class, false); system.registerUDF("xpath_number", UDFXPathDouble.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index c623adfb4d..9b6403d273 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -505,6 +505,11 @@ public Vectorizer() { // For conditional expressions supportedGenericUDFs.add(GenericUDFIf.class); + // Json UDFs + supportedGenericUDFs.add(GenericUDFIsJson.class); + supportedGenericUDFs.add(GenericUDFJsonValue.class); + supportedGenericUDFs.add(GenericUDFJsonQuery.class); + supportedAggregationUdfs.add("min"); supportedAggregationUdfs.add("max"); supportedAggregationUdfs.add("count"); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 608befcdf9..23168e518f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -383,6 +383,7 @@ KW_SYNC: 'SYNC'; KW_AST: 'AST'; KW_COST: 'COST'; KW_JOINCOST: 'JOINCOST'; +KW_JSON: 'JSON'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 46fa5635eb..168cbc4445 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -428,6 +428,7 @@ TOK_REPLACE; TOK_LIKERP; TOK_UNMANAGED; TOK_INPUTFORMAT; +TOK_JSON; } @@ -456,6 +457,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_TRUE", "TRUE"); xlateMap.put("KW_FALSE", "FALSE"); xlateMap.put("KW_UNKNOWN", "UNKNOWN"); + xlateMap.put("KW_JSON", "JSON"); xlateMap.put("KW_ALL", "ALL"); xlateMap.put("KW_NONE", "NONE"); xlateMap.put("KW_AND", "AND"); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index f22511ad67..14e3b7be64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -451,10 +451,12 @@ isCondition | KW_TRUE -> Identifier["istrue"] | KW_FALSE -> Identifier["isfalse"] | KW_UNKNOWN -> Identifier["isnull"] + | KW_JSON -> Identifier["isjson"] | KW_NOT KW_NULL -> Identifier["isnotnull"] | KW_NOT KW_TRUE -> Identifier["isnottrue"] | KW_NOT KW_FALSE -> Identifier["isnotfalse"] | KW_NOT KW_UNKNOWN -> Identifier["isnotnull"] + | KW_NOT KW_JSON -> Identifier["isnotjson"] ; precedenceUnaryPrefixExpression @@ -840,6 +842,7 @@ nonReserved | KW_RESOURCE | KW_PLAN | KW_PLANS | KW_QUERY_PARALLELISM | KW_ACTIVATE | KW_MOVE | KW_DO | KW_POOL | KW_ALLOC_FRACTION | KW_SCHEDULING_POLICY | KW_PATH | KW_MAPPING | KW_WORKLOAD | KW_MANAGEMENT | KW_ACTIVE | KW_UNMANAGED | KW_UNKNOWN + | KW_JSON ; //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIsJson.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIsJson.java new file mode 100644 index 0000000000..4e73162f93 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIsJson.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.ErrorListener; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonPathException; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonValueParser; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; + +import java.io.IOException; + +/** + * Implementation of SQL 2016 IS JSON feature. + */ +@Description(name = "isjson", + value = "string IS JSON - Parses the given string to see if it is valid JSON", + extended = "Returns null if json is null, otherwise true or false") +public class GenericUDFIsJson extends GenericUDF { + + private PrimitiveObjectInspectorConverter.TextConverter inputConverter; + private JsonValueParser parser; + private BooleanWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + checkArgsSize(arguments, 1, 1); + checkArgPrimitive(arguments, 0); + + inputConverter = new PrimitiveObjectInspectorConverter.TextConverter((PrimitiveObjectInspector)arguments[0]); + parser = new JsonValueParser(new ErrorListener()); + result = new BooleanWritable(); + + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object arg = arguments[0].get(); + if (arg == null) return null; + try { + String json = inputConverter.convert(arg).toString(); + if (json.trim().length() == 0) { + result.set(false); + } else { + parser.parse(json); + result.set(true); + } + } catch (JsonPathException e) { + result.set(false); + } catch (IOException e) { + throw new HiveException("Error using JSON parser: " + e.getMessage(), e); + } + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("isjson", children); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIsNotJson.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIsNotJson.java new file mode 100644 index 0000000000..d0f91b2ade --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIsNotJson.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BooleanWritable; + +@Description(name = "isnotjson", + value = "string IS NOT JSON - Parses the given string to see if it is not valid JSON", + extended = "Returns null if json is null, otherwise true or false") +public class GenericUDFIsNotJson extends GenericUDFIsJson { + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + BooleanWritable result = (BooleanWritable)super.evaluate(arguments); + result.set(!result.get()); + return result; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonQuery.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonQuery.java new file mode 100644 index 0000000000..67994dfd3a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonQuery.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonQueryConverter; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonSequenceConverter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +/** + * JsonQuery is almost identical to JsonValue except that it only supports strings. Thus it can be used to + * easily fetch fragments of JSON. The same thing can be achieved by calling JsonValue with a return type of string, + * as it will handle all the casting for you. That's in fact what this class does. It extends GenericUDFJsonValue, + * the only functionality it adds is to check that the default value (if present) is a string. + */ +@Description(name = "json_query", + value = "_FUNC_(json_query, path_expression [, default_value [, on_empty [, on_error [, passing key, passing value...]]])\n", + extended = "json_value, string\n" + + "path_expression, constant string.\n" + + "default_value optional, string: When on_empty or on_error are set to 'default' this value will" + + " be returned as the default value. Note that this does not have to be constant. If not provided" + + " or a null is passed, defaults to the empty string.\n" + + "on_empty optional, constant string: what to return when the result is empty. Valid values" + + " are 'null', 'default', and 'error'. 'null' will return a NULL, this is the default." + + " 'default' will return the instance provided in return_datatype. 'error' will throw an" + + " error. 'error' is not recommended as JSON Path expression can often return empty results.\n" + + "on_error optional, constant string: what to return when the path expression" + + " encounters an error. For example if the path expression '$.address.zip == 94100' encounters" + + " a zip code encoded as a String, it will throw an error. This can be set to the same values" + + " as on_empty with the same results. In general it is not recommended to set this to 'error'" + + " as JSON's loose type model makes such errors easy to generate. The default is 'null'." + + " Note that this does not refer to errors in parsing the path expression" + + " itself. If you passed a path expression of '$.name.nosuchfunc()' this would result in a" + + " compile time error since the path expression is invalid.\n" + + "passing key, passing value optional, passing key constant string, passing value one of " + + " string, int, long, double, boolean: passed in values to be used to fill in variables in" + + " the path expression. For example, if you had a path expression '$.$keyname' you could pass in " + + " ..., 'keyname', name_col) to dynamically fill out the name of the key from another" + + " column in the row. If this is not provided or NULL is passed no values will be plugged" + + " into the path expression. Note that using this slows down the UDF since it has to translate" + + " the passed in values on every invocation. Don't use this for stylistic reasons. Only use" + + " it if you really require dynamic clauses in your path expression." +) +public class GenericUDFJsonQuery extends GenericUDFJsonValue { + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length > RETURNING) { + switch (arguments[RETURNING].getCategory()) { + case PRIMITIVE: + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)arguments[RETURNING]; + switch (poi.getPrimitiveCategory()) { + case STRING: + case CHAR: + case VARCHAR: + break; + + default: + throw new UDFArgumentTypeException(RETURNING, getUdfName() + " only returns String, Char, or Varchar"); + } + break; + + default: + throw new UDFArgumentTypeException(RETURNING, getUdfName() + " only returns String, Char, or Varchar"); + } + } + return super.initialize(arguments); + } + + @Override + protected JsonSequenceConverter getConverter(ObjectInspector oi) { + return new JsonQueryConverter(oi); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonValue.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonValue.java new file mode 100644 index 0000000000..c1ea9b6efd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFJsonValue.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.CachingJsonValueParser; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.ErrorListener; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonConversionException; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonPathException; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonSequence; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonSequenceConverter; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonValueConverter; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.JsonValueParser; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.PathExecutor; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.PathParseResult; +import org.apache.hadoop.hive.ql.udf.generic.sqljsonpath.PathParser; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +@Description(name = "json_value", + value = "_FUNC_(json_value, path_expression [, return_datatype [, on_empty [, on_error [, passing key, passing value...]]])\n", + extended = "json_value, string\n" + + "path_expression, constant string.\n" + + "return_datatype optional: returning data type. This should be an instance of the type to" + + " return. It can be an instance of string, bigint, int, double, boolean," + + " array , named_struct. If a type other than these is desired," + + " the output can be cast. When on_empty or on_error are set to 'default' this value will" + + " be returned as the default value. Note that this does not have to be constant. If not provided" + + " or a null is passed, defaults to string.\n" + + "on_empty optional, constant string: what to return when the result is empty. Valid values" + + " are 'null', 'default', and 'error'. 'null' will return a NULL, this is the default." + + " 'default' will return the instance provided in return_datatype. 'error' will throw an" + + " error. 'error' is not recommended as JSON Path expression can often return empty results.\n" + + "on_error optional, constant string: what to return when the path expression" + + " encounters an error. For example if the path expression '$.address.zip == 94100' encounters" + + " a zip code encoded as a String, it will throw an error. This can be set to the same values" + + " as on_empty with the same results. In general it is not recommended to set this to 'error'" + + " as JSON's loose type model makes such errors easy to generate. The default is 'null'." + + " Note that this does not refer to errors in parsing the path expression" + + " itself. If you passed a path expression of '$.name.nosuchfunc()' this would result in a" + + " compile time error since the path expression is invalid.\n" + + "passing key, passing value optional, passing key constant string, passing value one of " + + " string, int, long, double, boolean: passed in values to be used to fill in variables in" + + " the path expression. For example, if you had a path expression '$.$keyname' you could pass in " + + " ..., 'keyname', name_col) to dynamically fill out the name of the key from another" + + " column in the row. If this is not provided or NULL is passed no values will be plugged" + + " into the path expression. Note that using this slows down the UDF since it has to translate" + + " the passed in values on every invocation. Don't use this for stylistic reasons. Only use" + + " it if you really require dynamic clauses in your path expression." + ) +public class GenericUDFJsonValue extends GenericUDF { + + private static final Logger LOG = LoggerFactory.getLogger(GenericUDFJsonValue.class); + + protected static final int JSON_VALUE = 0; + protected static final int PATH_EXPR = 1; + protected static final int RETURNING = 2; + protected static final int ON_EMPTY = 3; + protected static final int ON_ERROR = 4; + protected static final int START_PASSING = 5; + + @VisibleForTesting + enum WhatToReturn { NULL, DEFAULT, ERROR } + + private PrimitiveObjectInspectorConverter.TextConverter textConverter; + private transient PathParseResult parseResult; // Antlr parse trees aren't serializable + private String pathExpr; + private PathExecutor pathExecutor; + private JsonValueParser jsonParser; + private WhatToReturn onEmpty; + private WhatToReturn onError; + private Map passingOIs; + // This OI servers as a template for the ObjectInspector we'll return. It also is used to decode the default value + // we've been passed. + private ObjectInspector returnOI; + private Object constantDefaultVal; + private JsonSequenceConverter jsonConverter; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 2) { + throw new UDFArgumentLengthException(getFuncName() + " requires at least json_value and path expression"); + } + + // Json Value, needs to be a string + checkArgPrimitive(arguments, JSON_VALUE); + textConverter = new PrimitiveObjectInspectorConverter.TextConverter((PrimitiveObjectInspector)arguments[JSON_VALUE]); + + // Path expression, should be a constant + checkArgPrimitive(arguments, PATH_EXPR); + pathExpr = getConstantStringValue(arguments, PATH_EXPR); + if (pathExpr == null) { + throw new UDFArgumentTypeException(PATH_EXPR, getFuncName() + " requires JSON path expression to be constant"); + } + // We can't keep the parse expression because it doesn't serialize, but we still parse it here up front to make + // sure it will work. + parse(); + + returnOI = arguments.length > RETURNING ? arguments[RETURNING] + : PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + onEmpty = getOnEmptyOrOnError(arguments, ON_EMPTY); + onError = getOnEmptyOrOnError(arguments, ON_ERROR); + + // We only have to translate the default value if we might need to return it + if (ObjectInspectorUtils.isConstantObjectInspector(returnOI) && (onEmpty == WhatToReturn.DEFAULT || onError == WhatToReturn.DEFAULT)) { + ConstantObjectInspector coi = (ConstantObjectInspector) returnOI; + constantDefaultVal = coi.getWritableConstantValue(); + } + + if (arguments.length > START_PASSING) { + passingOIs = new HashMap<>(); + for (int i = START_PASSING; i < arguments.length; i += 2) { + if (arguments.length <= i + 1) { + throw new UDFArgumentLengthException("You must pass a matched set of passing variable names and values"); + } + + checkArgPrimitive(arguments, i); + String keyName = getConstantStringValue(arguments, i); + if (keyName == null) throw new UDFArgumentTypeException(i, "Passing variable name must be a constant string"); + passingOIs.put(keyName, arguments[i + 1]); + } + } else { + passingOIs = Collections.emptyMap(); + } + + return returnOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object jsonObj = arguments[JSON_VALUE].get(); + if (jsonObj == null) return null; // Per spec top of page 312, null input = null output + JsonSequence jsonValue; + String input = textConverter.convert(jsonObj).toString(); + if (LOG.isDebugEnabled()) LOG.debug("Evaluating with " + input); + + // The first time through we have to reparse, since we couldn't serialize the parse tree. The second branch + // of the 'or' will never be activated in production, but without it unit tests fail since the UDF isn't + // serialized between initialize and evaluate in unit tests. + if (parseResult == null || pathExecutor == null) { + parse(); + pathExecutor = new PathExecutor(); + jsonParser = new CachingJsonValueParser(new ErrorListener()); + jsonConverter = getConverter(returnOI); + } + + Map passing = arguments.length > START_PASSING ? + translatePassingObjects(arguments) : Collections.emptyMap(); + + try { + LOG.debug("Going to call parser with " + input); + jsonValue = jsonParser.parse(input); + } catch (JsonPathException|IOException e) { + LOG.warn("Failed to parse input " + input + " as JSON", e); + return getOnError(arguments, input, "Failed to parse input as JSON: " + e.getMessage()); + } + + try { + JsonSequence result = pathExecutor.execute(parseResult, jsonValue, passing); + if (LOG.isDebugEnabled()) LOG.debug("Received back: " + result.prettyPrint()); + return result.isEmpty() ? getOnEmpty(arguments, input) : jsonConverter.convert(result); + } catch (JsonPathException e) { + LOG.warn("Failed to execute path expression for input " + input, e); + return getOnError(arguments, input, e.getMessage()); + } catch (JsonConversionException e) { + LOG.info("Conversion failure", e); + return getOnError(arguments, input, e.getMessage()); + } + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("json_value", children); + } + + protected JsonSequenceConverter getConverter(ObjectInspector oi) { + return new JsonValueConverter(oi); + } + + private Object getOnError(DeferredObject[] arguments, String jsonValue, String error) throws HiveException { + return onErrorOrEmpty(arguments, jsonValue, error, onError); + } + + private Object getOnEmpty(DeferredObject[] arguments, String jsonValue) throws HiveException { + return onErrorOrEmpty(arguments, jsonValue, "Result of path expression is empty", onEmpty); + } + + private Object onErrorOrEmpty(DeferredObject[] arguments, String jsonValue, String error, WhatToReturn errorOrEmpty) throws HiveException { + switch (errorOrEmpty) { + case ERROR: throw new HiveException("Error for input: " + jsonValue + ": " + error); + case NULL: return null; + case DEFAULT: return getDefaultValue(arguments); + default: throw new RuntimeException("programming error"); + } + } + + private Object getDefaultValue(DeferredObject[] arguments) throws HiveException { + if (constantDefaultVal != null) return constantDefaultVal; + else return arguments[RETURNING].get(); + } + + // TODO could optimize this for constants, not sure it's worth it + private Map translatePassingObjects(DeferredObject[] args) throws HiveException { + Map passingObjs = new HashMap<>(); + for (int i = START_PASSING; i < args.length; i += 2) { + assert i + 1 < args.length; + String argName = PrimitiveObjectInspectorUtils.getString(args[i].get(), PrimitiveObjectInspectorFactory.writableStringObjectInspector); + passingObjs.put(argName, JsonSequence.fromObjectInspector(passingOIs.get(argName), args[i + 1].get())); + } + return passingObjs; + } + + private void parse() throws UDFArgumentException { + try { + PathParser parser = new PathParser(); + LOG.debug("Parsing " + pathExpr); + parseResult = parser.parse(pathExpr); + } catch (IOException | JsonPathException e) { + LOG.info("Failed to parse JSON path exception: " + e.getMessage(), e); + throw new UDFArgumentException("Failed to parse JSON path exception: " + e.getMessage()); + } + } + + private WhatToReturn getOnEmptyOrOnError(ObjectInspector[] arguments, int index) throws UDFArgumentTypeException { + if (arguments.length > index) { + checkArgPrimitive(arguments, index); + String str = getConstantStringValue(arguments, index); + try { + return WhatToReturn.valueOf(str.toUpperCase()); + } catch (IllegalArgumentException e) { + throw new UDFArgumentTypeException(index, "Unknown onEmpty or onError specification " + str); + } + } else { + return WhatToReturn.NULL; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/CachingJsonValueParser.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/CachingJsonValueParser.java new file mode 100644 index 0000000000..57619e89b9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/CachingJsonValueParser.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import java.io.IOException; +import java.util.concurrent.ExecutionException; + +/** + * A JsonValueParser that will cache the resulting JsonSequence's using {@link JsonSequenceCache}. + */ +public class CachingJsonValueParser extends JsonValueParser { + public CachingJsonValueParser(ErrorListener errorListener) { + super(errorListener); + } + + @Override + public JsonSequence parse(String jsonStr) throws IOException, JsonPathException { + return JsonSequenceCache.get().parse(jsonStr, this); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/EmptyOrErrorBehavior.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/EmptyOrErrorBehavior.java new file mode 100644 index 0000000000..2dc1c6390c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/EmptyOrErrorBehavior.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +public enum EmptyOrErrorBehavior { + ERROR, + NULL, + DEFAULT, + EMPTY_ARRAY, + EMPTY_OBJECT +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/ErrorListener.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/ErrorListener.java new file mode 100644 index 0000000000..e12f71bfa6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/ErrorListener.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.antlr.v4.runtime.ANTLRErrorListener; +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.atn.ATNConfigSet; +import org.antlr.v4.runtime.dfa.DFA; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +public class ErrorListener implements ANTLRErrorListener { + private List errors; + + public ErrorListener() { + errors = new ArrayList<>(); + } + + /** + * Reset this listener so there are no errors. + */ + void clear() { + errors.clear(); + } + + /** + * Check to see if an error occurred. + * @param expr expression that was parsed. Not checked here, just used in the error message. + * @throws JsonPathException if any errors were found. + */ + void checkForErrors(String expr) throws JsonPathException { + if (errors.size() > 0) throw new JsonPathException(expr, errors); + } + + /** + * Report a semantic error. + * @param s error string. + * @param ctx context at the point the error occurred. This is passed rather than a string to prevent all the work + * of constructing the error string unless there is an error. + */ + void semanticError(String s, ParserRuleContext ctx) { + errors.add("semantic error: " + s + " at \"" + getErrorText(ctx) + "\""); + } + + /** + * Report a runtime error. + * @param s error string. + * @param ctx context at the point the error occurred. This is passed rather than a string to prevent all the work + * of constructing the error string unless there is an error. + */ + void runtimeError(String s, ParserRuleContext ctx) { + errors.add("runtime error: " + s + " at \"" + getErrorText(ctx) + "\""); + } + + @Override + public void syntaxError(Recognizer recognizer, Object o, int line, int charpos, String s, RecognitionException e) { + errors.add("syntax error: " + s + " on line " + line + " at position " + charpos); + } + + @Override + public void reportAmbiguity(Parser parser, DFA dfa, int i, int i1, boolean b, BitSet bitSet, ATNConfigSet atnConfigSet) { + System.out.println("in reportAmbiguity"); + + } + + @Override + public void reportAttemptingFullContext(Parser parser, DFA dfa, int i, int i1, BitSet bitSet, ATNConfigSet atnConfigSet) { + System.out.println("in reportAttemptingFullContext"); + + } + + @Override + public void reportContextSensitivity(Parser parser, DFA dfa, int i, int i1, int i2, ATNConfigSet atnConfigSet) { + System.out.println("in reportContextSensitivity"); + + } + + private String getErrorText(ParserRuleContext ctx) { + // Stolen straight from ParserRuleContext except I put spaces in between tokens for readability. + if (ctx.getChildCount() == 0) { + return ""; + } else { + StringBuilder builder = new StringBuilder(); + + boolean first = true; + for(int i = 0; i < ctx.getChildCount(); ++i) { + if (first) first = false; + else builder.append(" "); + builder.append(ctx.getChild(i).getText()); + } + + return builder.toString(); + } + + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonConversionException.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonConversionException.java new file mode 100644 index 0000000000..ad8dfe5ed8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonConversionException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +/** + * For errors attempting to convert a JsonSequence into an Object to be returned by JsonValue. + */ +public class JsonConversionException extends Exception { + + public JsonConversionException(String message) { + super(message); + } + + public JsonConversionException(String message, Throwable cause) { + super(message, cause); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonPathException.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonPathException.java new file mode 100644 index 0000000000..ef6377cb09 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonPathException.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.apache.commons.lang3.StringUtils; + +import java.util.List; + +public class JsonPathException extends Exception { + final private String message; + + public JsonPathException(String expr, List errors) { + message = "'" + expr + "' produced a " + StringUtils.join(errors, "; "); + } + + @Override + public String getMessage() { + return message; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonQueryConverter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonQueryConverter.java new file mode 100644 index 0000000000..64d3b7681b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonQueryConverter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +public class JsonQueryConverter implements JsonSequenceConverter { + + private ObjectInspectorConverters.Converter converter; + + /** + * + * @param outputObjectInspector ObjectInspector to use to determine what form the writable should take. + */ + public JsonQueryConverter(ObjectInspector outputObjectInspector) { + ObjectInspector inputObjectInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + converter = ObjectInspectorConverters.getConverter(inputObjectInspector, outputObjectInspector); + } + + @Override + public Object convert(JsonSequence json) throws JsonConversionException { + if (json.isNull() || json.isEmpty()) return null; + else return converter.convert(json.isString() ? json.asString() : json.toString()); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequence.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequence.java new file mode 100644 index 0000000000..8ed81cf8b0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequence.java @@ -0,0 +1,667 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.antlr.v4.runtime.ParserRuleContext; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.DoubleBinaryOperator; +import java.util.function.LongBinaryOperator; + +/** + * JsonSequence tracks the JSON value being returned from a section of the parse tree. Since the value being returned + * can change type as it moves through tree, JsonSequence can change its type as it goes along. + * + * The class is final to help the compiler inline methods, as we want operations on this to be as fast + * possible since they'll be in the inner loop. + * + * Ideally we'd like to determine the types as part of the parse and not do the type branching for things like + * arithmetic operations on every row. Since JSON does not guarantee static types (eg, the key "salary" could be + * an int in one record and a double in the next) this is not completely possible. For constants and such the hope + * is that the branch prediction on the chip will kick in and save us. It's worth experimenting in the future to see + * if this could be sped up by at least generating typed methods for constants. + */ +public final class JsonSequence { + + enum Type { + LONG, + DOUBLE, + BOOL, + STRING, + LIST, // Represents JSON array, but called list since it is a list in Java and have asArray() return a List just too confusing + OBJECT, // Represents JSON object, {} + NULL, // Represents the JSON null literal + EMPTY_RESULT // This is not a JSON type. It represents the result of Path query that did not match anything. + // It is returned separately from null so that the caller can decide how to deal with errors. + } + private static final Logger LOG = LoggerFactory.getLogger(JsonSequence.class); + + private static final Map LIST_OBJECT_INSPECTOR_CACHE = new HashMap<>(); + + /** + * Represents the JSON null "key" : null + */ + public static final JsonSequence nullJsonSequence = new JsonSequence(Type.NULL); + + public static final JsonSequence emptyResult = new JsonSequence(Type.EMPTY_RESULT); + + /** + * Represents the JSON true "key" : true + */ + public static final JsonSequence trueJsonSequence = new JsonSequence(true); + + /** + * Represents the JSON false "key" : false + */ + public static final JsonSequence falseJsonSequence = new JsonSequence(false); + + private Type type; + private Object val; + + /** + * Private because we don't want users creating new nulls + */ + private JsonSequence(Type type) { + this.type = type; + val = null; + } + + /** + * Private because we don't want users creating new true and false values + * @param val true or false + */ + private JsonSequence(boolean val) { + this.val = val; + type = Type.BOOL; + } + + /** + * Create a new JsonSequence that represents an integer value. + * @param val integer value (as a long) + */ + public JsonSequence(long val) { + this.val = val; + type = Type.LONG; + } + + /** + * Creates a new JsonSequence that represents a decimal value. + * @param val decimal value + */ + public JsonSequence(double val) { + this.val = val; + type = Type.DOUBLE; + } + + /** + * Creates a new JsonSequence that represents a string value. + * @param val string value + */ + public JsonSequence(String val) { + this.val = val; + type = Type.STRING; + } + + /** + * Creates a new JsonSequence that represents an array + * @param val array value (as a list) + */ + JsonSequence(List val) { + this.val = val; + type = Type.LIST; + } + + /** + * Creates a new JsonSequence that represents a JSON object + * @param val object value (as a map) + */ + JsonSequence(Map val) { + this.val = val; + type = Type.OBJECT; + } + + /** + * Copy constructor. This is a shallow copy, the underlying val Object is not copied. + * @param template JsonSequence to use as a template. + */ + JsonSequence(JsonSequence template) { + this.val = template.val; + this.type = template.type; + } + + public static JsonSequence fromObjectInspector(ObjectInspector oi, Object data) throws UDFArgumentException { + switch (oi.getCategory()) { + case LIST: + ListObjectInspector loi = (ListObjectInspector)oi; + List jsonSequences = new ArrayList<>(loi.getListLength(data)); + for (int i = 0; i < loi.getListLength(data); i++) { + jsonSequences.add(fromObjectInspector(loi.getListElementObjectInspector(), loi.getListElement(data, i))); + } + return new JsonSequence(jsonSequences); + + case STRUCT: + StructObjectInspector soi = (StructObjectInspector)oi; + Map fields = new HashMap<>(); + for (StructField sf : soi.getAllStructFieldRefs()) { + JsonSequence json = fromObjectInspector(sf.getFieldObjectInspector(), soi.getStructFieldData(data, sf)); + fields.put(sf.getFieldName(), json); + } + return new JsonSequence(fields); + + case PRIMITIVE: + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)oi; + switch (poi.getPrimitiveCategory()) { + case STRING: return new JsonSequence((String)poi.getPrimitiveJavaObject(data)); + case LONG: return new JsonSequence((Long)poi.getPrimitiveJavaObject(data)); + case INT: return new JsonSequence((Integer)poi.getPrimitiveJavaObject(data)); + case DOUBLE: return new JsonSequence((Double)poi.getPrimitiveJavaObject(data)); + case BOOLEAN: return new JsonSequence((Boolean)poi.getPrimitiveJavaObject(data)); + default: throw new UDFArgumentException("Cannot cast a " + poi.getPrimitiveCategory().name() + " to JsonSequence"); + } + + default: + throw new UDFArgumentException("Cannot cast a " + oi.getCategory().name() + " to JsonSequence"); + } + } + + public boolean isLong() { + return type == Type.LONG; + } + + public boolean isDouble() { + return type == Type.DOUBLE; + } + + public boolean isBool() { + return type == Type.BOOL; + } + + public boolean isString() { + return type == Type.STRING; + } + + public boolean isList() { + return type == Type.LIST; + } + + public boolean isObject() { + return type == Type.OBJECT; + } + + public boolean isNull() { + return type == Type.NULL; + } + + public boolean isEmpty() { + return type == Type.EMPTY_RESULT; + } + + /** + * Get as a long. Will assert (or throw ClassCastException) if not really a long. + * @return as a long + */ + public long asLong() { + assert val instanceof Long; + return (Long)val; + } + + /** + * Get as a boolean. Will assert (or throw ClassCastException) if not really a boolean. + * @return as a boolean + */ + public boolean asBool() { + assert val instanceof Boolean; + return (Boolean)val; + } + + /** + * Get as a double. Will assert (or throw ClassCastException) if not really a double. + * @return as a double + */ + public double asDouble() { + assert val instanceof Double; + return (Double)val; + + } + + /** + * Get as a string. Will assert (or throw ClassCastException) if not really a string. + * @return as a string + */ + public String asString() { + assert val instanceof String; + return (String)val; + } + + /** + * Get as a list. Will assert (or throw ClassCastException) if not really a list. + * @return as a list + */ + public List asList() { + assert val instanceof List; + return (List)val; + } + + /** + * Get as an object. Will assert (or throw ClassCastException) if not really an object. + * @return as an object + */ + public Map asObject() { + assert val instanceof Map; + return (Map)val; + } + + /** + * Get the underlying object with no cast. You should only call this if you're sure you want it as an object + * and aren't worried about the type. + * @return the value + */ + Object getVal() { + return val; + } + + void add(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + arithmetic(other, (left, right) -> left + right, (left, right) -> left + right, false, errorListener, ctx); + } + + void subtract(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + arithmetic(other, (left, right) -> left - right, (left, right) -> left - right, false, errorListener, ctx); + } + + void multiply(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + arithmetic(other, (left, right) -> left * right, (left, right) -> left * right, false, errorListener, ctx); + } + + void divide(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + arithmetic(other, (left, right) -> left / right, (left, right) -> left / right, true, errorListener, ctx); + } + + void modulo(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + switch (type) { + case LONG: + switch (other.type) { + case LONG: + if (other.asLong() == 0) { + errorListener.runtimeError("Division by zero at ", ctx); + setNull(); + } else { + val = asLong() % other.asLong(); + } + break; + + default: + errorListener.semanticError("You cannot do mod on a " + other.type.name().toLowerCase(), ctx); + setNull(); + break; + } + break; + + default: + errorListener.semanticError("You cannot do mod on a " + type.name().toLowerCase(), ctx); + setNull(); + break; + } + } + + void negate(ErrorListener errorListener, ParserRuleContext ctx) { + switch (type) { + case LONG: + val = asLong() * -1; + break; + + case DOUBLE: + val = asDouble() * -1.0; + break; + + default: + errorListener.semanticError("You cannot do arithmetic on a " + type.name().toLowerCase(), ctx); + setNull(); + break; + } + } + + /** + * this is more than equals(). It checks to assure the types are the same or converts where possible. If the + * types cannot be compared a semantic error is raised in errorListener. + * @param other other value + * @param errorListener error listener to log errors to + * @return either trueJsonSequence or falseJsonSequence + */ + JsonSequence equalsOp(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + // Null requires special handling, because if two things are null they are immediately equal + if (type == Type.NULL || other.type == Type.NULL) { + return type == Type.NULL && other.type == Type.NULL ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + return equalityOperator(other, Object::equals, errorListener, ctx) ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + /** + * This is more than !equals. It checks types to make sure this comparison is sensible. If it is not a semantic + * error is returned. + * @param other other JsonSequence. + * @param errorListener error listener to log errors to + * @return either trueJsonSequence or falseJsonSequence + */ + JsonSequence notEqualsOp(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + if (type == Type.NULL || other.type == Type.NULL) { + return type == Type.NULL && other.type == Type.NULL ? JsonSequence.falseJsonSequence : JsonSequence.trueJsonSequence; + } + return equalityOperator(other, (obj1, obj2) -> !obj1.equals(obj2), errorListener, ctx) ? JsonSequence.trueJsonSequence : + JsonSequence.falseJsonSequence; + } + + JsonSequence greaterThanOp(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + return compareTo(other, errorListener, ctx) > 0 ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + JsonSequence greaterThanEqualOp(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + return compareTo(other, errorListener, ctx) >= 0 ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + JsonSequence lessThanOp(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + return compareTo(other, errorListener, ctx) < 0 ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + JsonSequence lessThanEqualOp(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + return compareTo(other, errorListener, ctx) <= 0 ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + Type getType() { + return type; + } + + private void arithmetic(JsonSequence other, LongBinaryOperator longOp, DoubleBinaryOperator doubleOp, + boolean zeroCheck, ErrorListener errorListener, ParserRuleContext ctx) { + switch (type) { + case LONG: + switch (other.type) { + case LONG: + if (zeroCheck && other.asLong() == 0) { + errorListener.runtimeError("Division by zero at ", ctx); + setNull(); + } else { + val = longOp.applyAsLong(asLong(), other.asLong()); + } + break; + + case DOUBLE: + if (zeroCheck && other.asDouble() == 0.0) { + errorListener.runtimeError("Division by zero at ", ctx); + setNull(); + } else { + type = Type.DOUBLE; + val = doubleOp.applyAsDouble((double)asLong(), other.asDouble()); + } + break; + + default: + errorListener.semanticError("You cannot do arithmetic on a " + other.type.name().toLowerCase(), ctx); + setNull(); + break; + } + break; + + case DOUBLE: + switch (other.type) { + case LONG: + if (zeroCheck && other.asLong() == 0) { + errorListener.runtimeError("Division by zero at ", ctx); + setNull(); + } else { + val = doubleOp.applyAsDouble(asDouble(), (double)other.asLong()); + } + break; + + case DOUBLE: + if (zeroCheck && other.asDouble() == 0.0) { + errorListener.runtimeError("Division by zero at ", ctx); + setNull(); + } else { + val = doubleOp.applyAsDouble(asDouble(), other.asDouble()); + } + break; + + default: + errorListener.semanticError("You cannot do arithmetic on a " + other.type.name().toLowerCase(), ctx); + setNull(); + break; + } + break; + + default: + errorListener.semanticError("You cannot do arithmetic on a " + type.name().toLowerCase(), ctx); + setNull(); + break; + } + } + + private boolean equalityOperator(JsonSequence other, BiFunction comparator, + ErrorListener errorListener, ParserRuleContext ctx) { + switch (type) { + case LONG: + switch (other.type) { + case LONG: + return comparator.apply(asLong(), other.asLong()); + + case DOUBLE: + return comparator.apply((double)asLong(), other.asDouble()); + + default: + errorListener.semanticError("Cannot compare a long to a non-numeric type", ctx); + return false; + } + + case DOUBLE: + switch (other.type) { + case DOUBLE: + return comparator.apply(asDouble(), other.asDouble()); + + case LONG: + return comparator.apply(asDouble(), (double)other.asLong()); + + default: + errorListener.semanticError("Cannot compare a double to a non-numeric type", ctx); + return false; + + } + + case NULL: + // Null requires special handling because we cannot call the .equals method on its val. + throw new RuntimeException("Programming error"); + + case BOOL: + case STRING: + case LIST: + case OBJECT: + if (type != other.type) { + errorListener.semanticError("Cannot compare a " + type.name().toLowerCase() + " to a " + + other.type.name().toLowerCase(), ctx); + return false; + } + return comparator.apply(val, other.val); + + case EMPTY_RESULT: + return false; + + default: + throw new RuntimeException("Programming error"); + } + } + + // This comparison doesn't handle type checking or coercion. Look at lessThanOp etc. for that. + private int compareTo(JsonSequence other, ErrorListener errorListener, ParserRuleContext ctx) { + switch (type) { + case LONG: + switch (other.type) { + case LONG: + return ((Long)val).compareTo(other.asLong()); + + case DOUBLE: + Double d = (double)asLong(); + return d.compareTo(other.asDouble()); + + default: + errorListener.semanticError("Cannot compare a long to a " + other.type.name().toLowerCase(), ctx); + return 0; + } + + case DOUBLE: + switch (other.type) { + case DOUBLE: + return ((Double)val).compareTo(other.asDouble()); + + case LONG: + return ((Double)val).compareTo((double)other.asLong()); + + default: + errorListener.semanticError("Cannot compare a decimal to a " + other.type.name().toLowerCase(), ctx); + return 0; + } + + case STRING: + if (other.isString()) return ((String)val).compareTo(other.asString()); + errorListener.semanticError("Cannot compare a string to a " + other.type.name().toLowerCase(), ctx); + return 0; + + default: + errorListener.semanticError("Cannot apply an inequality operator to a " + type.name().toLowerCase(), ctx); + return 0; + } + } + + private void setNull() { + type = Type.NULL; + val = null; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof JsonSequence)) return false; + JsonSequence other = (JsonSequence)obj; + if (isNull() && other.isNull()) return true; + else if (isEmpty() && other.isEmpty()) return true; + else return type == other.type && val.equals(other.val); + } + + /** + * This will return a valid JSON value with minimal white space. The result is a valid JSON fragment. + * @return valid JSON string + */ + @Override + public String toString() { + return prettyPrint(0, false); + } + + /** + * Same as {@link #toString()}, but with returns to make it more easily human readable. Also, if + * {@link #isEmpty()} is true for this, it will print "empty result", which means this is not guaranteed + * to produce valid JSON. + * @return human readable (usually) valid JSON string. + */ + public String prettyPrint() { + return prettyPrint(0, true); + } + + private String prettyPrint(int in, boolean whitespace) { + StringBuilder buf = new StringBuilder(); + switch (type) { + case LONG: + case DOUBLE: + case BOOL: + return val.toString(); + + case STRING: + return '"' + val.toString() + '"'; + + case NULL: + return "null"; + + case EMPTY_RESULT: + return whitespace ? "empty result" : ""; + + case LIST: + if (whitespace) indent(buf, in); + buf.append("["); + if (whitespace) buf.append("\n"); + boolean first = true; + for (JsonSequence element : asList()) { + if (first) { + first = false; + } else { + buf.append(","); + if (whitespace) buf.append("\n"); + } + if (whitespace) indent(buf, in); + buf.append(element.prettyPrint(in + 1, whitespace)); + } + if (whitespace) { + buf.append("\n"); + indent(buf, in); + } + buf.append("]"); + return buf.toString(); + + case OBJECT: + if (whitespace) indent(buf, in); + buf.append("{"); + if (whitespace) buf.append("\n"); + first = true; + for (Map.Entry entry : asObject().entrySet()) { + if (first) first = false; + else { + buf.append(","); + if (whitespace) buf.append("\n"); + } + if (whitespace) indent(buf, in); + buf.append("\"") + .append(entry.getKey()) + .append("\""); + if (whitespace) buf.append(" "); + buf.append(":"); + if (whitespace) buf.append(" "); + buf.append(entry.getValue().prettyPrint(in + 1, whitespace)); + } + if (whitespace) { + buf.append("\n"); + indent(buf, in); + } + buf.append("}"); + return buf.toString(); + + default: + throw new RuntimeException("Programming error"); + } + } + + private void indent(StringBuilder buf, int in) { + for (int i = 0; i < in; i++) buf.append(" "); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequenceCache.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequenceCache.java new file mode 100644 index 0000000000..a3a7d7c56b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequenceCache.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +/** + * Cache parsed bits of JSON to avoid re-parsing a JSON string repeatedly. + * + * This cache is intended to address the situation where a user has a table with one JSON column and then runs a: + * query like + * "select json_value(json, "$.name") as name, json_value(json, "$.address.street") as street, ..." + * where he or she is repeatedly pulling values out of the same JSON column. We don't want to parse that same JSON + * for every invocation. At the same time consider a case query like: + * "select json_value(jsoncol1, "$.name"), json_value(jsoncol2, "$.name"), ..." + * Since each json_value invocation is looking at a different column there is no value in caching the resulting + * JsonSequences. + * + * The cache needs to be large enough that it can cache all the parse values in a vector batch, since the first + * invocation of json_value will process all 1024 entries in the batch before the second invocation sees any of them. + * + * Ideally the cache would be smart about what values to cache and what not. But it's hard to know how to size the + * cache because each UDF can't see all the others and thus doesn't know who's sharing its input. Also, multiple + * queries maybe run at the same time, possibly accessing the same or other values. + * + * The compromise solution to this is to build an LRU cache with a very aggressive timeout. Given that the goal is + * to preserve the values for the duration of one vector batch even 1 second should be plenty of time in the cache. + * + * There is also a cleaner thread that wakes periodically and calls cleanup() to avoid having the cache for the last + * vector batch in a query sit there unused and hogging memory until some other query calls json_value possibly far in + * the future. + */ +class JsonSequenceCache { + private static final Logger LOG = LoggerFactory.getLogger(JsonSequenceCache.class); + + private static final int MAX_CACHE_SIZE = VectorizedRowBatch.DEFAULT_SIZE * 100; + private static final int INITIAL_CACHE_SIZE = VectorizedRowBatch.DEFAULT_SIZE; + private static final long MILLISECONDS_TO_LIVE = 1000; + private static final long CLEAN_INTERVAL = 1000 * 10; + + private static JsonSequenceCache singleton = null; + + static JsonSequenceCache get() { + if (singleton == null) { + synchronized (JsonSequenceCache.class) { + if (singleton == null) { + singleton = new JsonSequenceCache(); + } + } + } + return singleton; + } + + private final Cache cache; + private final MessageDigest md5; + + private JsonSequenceCache() { + LOG.debug("Cleaning"); + cache = CacheBuilder + .newBuilder() + .expireAfterAccess(MILLISECONDS_TO_LIVE, TimeUnit.MILLISECONDS) + .initialCapacity(INITIAL_CACHE_SIZE) + .maximumSize(MAX_CACHE_SIZE) + .build(); + + try { + md5 = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + + Runnable cleaner = () -> { + try { + while (true) { + Thread.sleep(CLEAN_INTERVAL); + cache.cleanUp(); + } + } catch (InterruptedException e) { + LOG.info("JsonSequenceCache cleaner thread received interrupt, shutting down."); + } + }; + new Thread(cleaner).start(); + } + + JsonSequence parse(String jsonStr, final JsonValueParser parser) throws IOException, JsonPathException { + // I don't use cache.get() because it does the loading (which in this case is the parsing) under the lock, + // which is bad. It's better to allow a string to be parsed multiple times then to serialize access to the cache. + ByteArrayWrapper hash = hash(jsonStr); + JsonSequence seq = cache.getIfPresent(hash); + if (seq == null) { + seq = parser.doParse(jsonStr); + cache.put(hash, seq); + } + return seq; + } + + private synchronized ByteArrayWrapper hash(String jsonStr) { + md5.reset(); + return new ByteArrayWrapper(md5.digest(jsonStr.getBytes())); + + } + + // byte[] only matches as a key in the cache if it's the same byte array, so make it actually look at the contents + // of the array. + private static class ByteArrayWrapper { + private final byte[] array; + + ByteArrayWrapper(byte[] array) { + this.array = array; + } + + @Override + public boolean equals(Object o) { + // Fast and dangerous + assert o instanceof ByteArrayWrapper; + return Arrays.equals(array, ((ByteArrayWrapper)o).array); + } + + @Override + public int hashCode() { + int hash = 0; + switch (array.length) { + // All fall throughs here intentional. + default: + case 4: hash = array[3] << 12; + case 3: hash += array[2] << 8; + case 2: hash += array[1] << 4; + case 1: hash += array[0]; + } + return hash; + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequenceConverter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequenceConverter.java new file mode 100644 index 0000000000..77b140d4a4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonSequenceConverter.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +public interface JsonSequenceConverter { + + /** + * Convert the JsonSequence to something that can pass on to the rest of Hive. The conversion will be based on + * the object inspector. + * @param json JsonSequence to be converted to an object that can be read by outputOI. + * @return an object, not necessary a writable (may be a list or a map), or null. + * @throws JsonConversionException if a bad conversion is attempted. + */ + Object convert(JsonSequence json) throws JsonConversionException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonValueConverter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonValueConverter.java new file mode 100644 index 0000000000..a9b85fc48b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonValueConverter.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Given a JsonSequence and an ObjectInspector, convert the JsonSequence to the expected output type. This is done + * as a separate class so that we can keep state across multiple JsonSequences. + */ +public class JsonValueConverter implements JsonSequenceConverter { + private static final Logger LOG = LoggerFactory.getLogger(JsonValueConverter.class); + + private final ObjectInspector outputObjectInspector; + private final Map objectInspectorCache; + private final Map converterCache; + + /** + * + * @param outputObjectInspector ObjectInspector to use to determine what form the writable should take. + */ + public JsonValueConverter(ObjectInspector outputObjectInspector) { + this.outputObjectInspector = outputObjectInspector; + objectInspectorCache = new HashMap<>(); + converterCache = new HashMap<>(); + } + + @Override + public Object convert(JsonSequence json) throws JsonConversionException { + return convert(outputObjectInspector, json, true); + } + + private Object convert(ObjectInspector outputOI, JsonSequence json, boolean useCache) throws JsonConversionException { + if (json.isNull() || json.isEmpty()) return null; + + String cacheKey = buildCacheKey(json, outputOI); + ObjectInspectorConverters.Converter converter = null; + // Don't use the cache when we're converting nested objects. Converters have a member object they always return, + // so for example if you use one converter to convert all the elements of a list you'll end up with everything + // in your list pointing to the same object, which will have the value of the last thing converted. We could + // still cache the converter and then copy the result, but this seems equivalent to not caching the converter. + if (useCache) converter = converterCache.get(cacheKey); + + // Wrap the whole thing in a try because some of the converters throw RuntimeExceptions if you try a conversion + // they don't support. We don't want to blow up the execution with a RuntimeException + try { + if (converter == null) { + ObjectInspector inputObjectInspector = getInputObjectInspector(json, outputOI); + if (LOG.isDebugEnabled()) { + LOG.debug("Using output ObjectInspector " + + ObjectInspectorUtils.getObjectInspectorName(outputOI)); + LOG.debug("Using input ObjectInspector " + ObjectInspectorUtils.getObjectInspectorName(inputObjectInspector)); + } + converter = ObjectInspectorConverters.getConverter(inputObjectInspector, outputOI); + converterCache.put(cacheKey, converter); + } + + switch (outputOI.getCategory()) { + case STRUCT: + if (json.isObject()) { + StructObjectInspector soi = (StructObjectInspector) outputOI; + List output = new ArrayList<>(); + for (StructField sf : soi.getAllStructFieldRefs()) { + JsonSequence seq = json.asObject().get(sf.getFieldName()); + output.add(seq == null ? null : convert(sf.getFieldObjectInspector(), seq, false)); + } + return converter.convert(output); + } + throw new JsonConversionException("Attempt to cast " + json.getType().name().toLowerCase() + " as object"); + + case LIST: + if (json.isList()) { + ListObjectInspector loi = (ListObjectInspector) outputOI; + List converted = new ArrayList<>(); + for (JsonSequence element : json.asList()) { + converted.add(convert(loi.getListElementObjectInspector(), element, false)); + } + return converter.convert(converted); + } + throw new JsonConversionException("Attempt to cast " + json.getType().name().toLowerCase() + " as list"); + + case PRIMITIVE: + return converter.convert(json.getVal()); + + default: + throw new RuntimeException("Programming error, unexpected category " + outputOI.getCategory()); + } + } catch (Exception e) { + throw new JsonConversionException("Failed conversion", e); + } + } + + private ObjectInspector getInputObjectInspector(JsonSequence json, ObjectInspector outputOI) { + String cacheKey = buildCacheKey(json, outputOI); + ObjectInspector cached = objectInspectorCache.get(cacheKey); + if (cached != null) return cached; + ObjectInspector inputOI; + switch (json.getType()) { + case OBJECT: + if (outputOI.getCategory() == ObjectInspector.Category.STRUCT) { + StructObjectInspector soi = (StructObjectInspector)outputOI; + List names = new ArrayList<>(); + List fieldOIs = new ArrayList<>(); + for (StructField sf : soi.getAllStructFieldRefs()) { + names.add(sf.getFieldName()); + ObjectInspector fieldInspector = translateOutputOI(sf.getFieldObjectInspector()); + fieldOIs.add(fieldInspector); + } + inputOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, fieldOIs); + } else { + inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + } + break; + + + case LIST: + if (outputOI.getCategory() == ObjectInspector.Category.LIST) { + ListObjectInspector loi = (ListObjectInspector) outputOI; + inputOI = ObjectInspectorFactory.getStandardListObjectInspector(translateOutputOI(loi.getListElementObjectInspector())); + } else { + inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + } + break; + + case BOOL: + inputOI = PrimitiveObjectInspectorFactory.javaBooleanObjectInspector; + break; + + case LONG: + inputOI = PrimitiveObjectInspectorFactory.javaLongObjectInspector; + break; + + case DOUBLE: + inputOI = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; + break; + + case STRING: + case NULL: // here to handle case where there's a null in a list or a struct + inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + break; + + default: + throw new RuntimeException("Programming error, unexpected type " + json.getType().name()); + } + objectInspectorCache.put(cacheKey, inputOI); + return inputOI; + } + + // Translate the output object inspector to the right type. This is necessary to get rid of constant + // OIs. + private ObjectInspector translateOutputOI(ObjectInspector outputOI) { + switch (outputOI.getCategory()) { + case STRUCT: + List names = new ArrayList<>(); + List inspectors = new ArrayList<>(); + StructObjectInspector soi = (StructObjectInspector)outputOI; + for (StructField sf : soi.getAllStructFieldRefs()) { + names.add(sf.getFieldName()); + inspectors.add(translateOutputOI(sf.getFieldObjectInspector())); + } + return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); + + case LIST: + return ObjectInspectorFactory.getStandardListObjectInspector(translateOutputOI(((ListObjectInspector)outputOI).getListElementObjectInspector())); + + case PRIMITIVE: + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)outputOI; + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + + default: + throw new RuntimeException("Programming error, unexpected category " + outputOI.getCategory().name()); + } + } + + private String buildCacheKey(JsonSequence json, ObjectInspector outputOI) { + return buildJsonTypeName(json) + outputOI.getClass().getName(); + } + + // This does not build a human readable type name. The point here is to build a unique string that can be used + // as a key in a map + private String buildJsonTypeName(JsonSequence json) { + switch (json.getType()) { + case OBJECT: + StringBuilder buf = new StringBuilder("O{"); + for (JsonSequence seq : json.asObject().values()) { + buf.append(buildJsonTypeName(seq)); + } + buf.append("}"); + return buf.toString(); + + case LIST: + return "L"; + + case BOOL: + return "b"; + + case LONG: + return "l"; + + case DOUBLE: + return "d"; + + case NULL: + case EMPTY_RESULT: + case STRING: + return "s"; + + default: + throw new RuntimeException("Programming error, unexpected type " + json.getType()); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonValueParser.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonValueParser.java new file mode 100644 index 0000000000..bf613ae8c4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/JsonValueParser.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.hadoop.hive.ql.udf.generic.JsonBaseVisitor; +import org.apache.hadoop.hive.ql.udf.generic.JsonLexer; +import org.apache.hadoop.hive.ql.udf.generic.JsonParser; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class JsonValueParser extends JsonBaseVisitor { + + private ErrorListener errorListener; + private Deque> arrayStack; + private Deque> objStack; + + public JsonValueParser(ErrorListener errorListener) { + this.errorListener = errorListener; + arrayStack = new ArrayDeque<>(); + objStack = new ArrayDeque<>(); + } + + /** + * Parse a string into a JsonSequence. + * @param jsonStr string to be parsed + * @return JsonSequence representing the string + * @throws IOException probably is never really thrown + * @throws JsonPathException if the string failed parse as valid JSON. + */ + public JsonSequence parse(String jsonStr) throws IOException, JsonPathException { + return doParse(jsonStr); + } + + /** + * Separated from {@link #parse(String)} so that subclasses can play games before actually parsing (like check + * the cache). + * @param jsonStr string to be parsed + * @return JsonSequence representing the string + * @throws IOException probably is never really thrown + * @throws JsonPathException if the string failed parse as valid JSON. + */ + JsonSequence doParse(String jsonStr) throws IOException, JsonPathException { + clear(); + JsonLexer scanner = new JsonLexer(new ANTLRInputStream(new ByteArrayInputStream(jsonStr.getBytes()))); + CommonTokenStream tokens = new CommonTokenStream(scanner); + JsonParser parser = new JsonParser(tokens); + parser.addErrorListener(errorListener); + ParseTree tree = parser.object(); + errorListener.checkForErrors(jsonStr); + JsonSequence jsonVal = visit(tree); + errorListener.checkForErrors(jsonStr); + return jsonVal; + } + + @Override + public JsonSequence visitObject(org.apache.hadoop.hive.ql.udf.generic.JsonParser.ObjectContext ctx) { + objStack.push(new HashMap<>()); + visitChildren(ctx); + Map obj = objStack.pop(); + return new JsonSequence(obj); + } + + @Override + public JsonSequence visitElement(org.apache.hadoop.hive.ql.udf.generic.JsonParser.ElementContext ctx) { + JsonSequence element = visit(ctx.getChild(2)); + String key = ctx.getChild(0).getText(); + assert objStack.size() > 0; + objStack.peek().put(key.substring(1, key.length() - 1), element); + return JsonSequence.emptyResult; + } + + @Override + public JsonSequence visitArray(org.apache.hadoop.hive.ql.udf.generic.JsonParser.ArrayContext ctx) { + arrayStack.push(new ArrayList<>()); + visitChildren(ctx); + List array = arrayStack.pop(); + return new JsonSequence(array); + } + + @Override + public JsonSequence visitArray_element(org.apache.hadoop.hive.ql.udf.generic.JsonParser.Array_elementContext ctx) { + JsonSequence element = visit(ctx.getChild(0)); + assert arrayStack.size() > 0; + arrayStack.peek().add(element); + return JsonSequence.emptyResult; + } + + @Override + public JsonSequence visitNull_literal(org.apache.hadoop.hive.ql.udf.generic.JsonParser.Null_literalContext ctx) { + return JsonSequence.nullJsonSequence; + } + + @Override + public JsonSequence visitBoolean_literal(org.apache.hadoop.hive.ql.udf.generic.JsonParser.Boolean_literalContext ctx) { + if (ctx.getText().equalsIgnoreCase("true")) return JsonSequence.trueJsonSequence; + else if (ctx.getText().equalsIgnoreCase("false")) return JsonSequence.falseJsonSequence; + else throw new RuntimeException("Programming error"); + } + + @Override + public JsonSequence visitInt_literal(org.apache.hadoop.hive.ql.udf.generic.JsonParser.Int_literalContext ctx) { + return new JsonSequence(Long.valueOf(ctx.getText())); + } + + @Override + public JsonSequence visitDecimal_literal(org.apache.hadoop.hive.ql.udf.generic.JsonParser.Decimal_literalContext ctx) { + return new JsonSequence(Double.valueOf(ctx.getText())); + } + + @Override + public JsonSequence visitString_literal(org.apache.hadoop.hive.ql.udf.generic.JsonParser.String_literalContext ctx) { + String val = ctx.getText(); + return new JsonSequence(val.substring(1, val.length() - 1)); + } + + private void clear() { + errorListener.clear(); + arrayStack.clear(); + objStack.clear(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/Mode.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/Mode.java new file mode 100644 index 0000000000..737120b6b0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/Mode.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +public enum Mode { + STRICT, + LAX +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathExecutor.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathExecutor.java new file mode 100644 index 0000000000..aed1489040 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathExecutor.java @@ -0,0 +1,761 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import com.google.common.annotations.VisibleForTesting; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.hadoop.hive.ql.udf.generic.SqlJsonPathBaseVisitor; +import org.apache.hadoop.hive.ql.udf.generic.SqlJsonPathParser; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BinaryOperator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +/** + * Evaluates SQL/JSON Path statements against a specific JSON value. This is done by visiting the parse tree. This + * class is not reentrant and is only intended to be used by a single thread at a time. However, it does not depend + * on any static state and as many instances as desired can be created. + *

+ * + * The class is designed to be used repeatedly and thus resets all its state on each call to + * {@link #execute(PathParseResult, JsonSequence, Map)}. + *

+ * + * The following digressions from the SQL Spec are noted: + *

    + *
  1. Values passed in to execute via the passing clause are not parsed to see if they are JSON. A simple + * replace is done in the path expression.
  2. + *
+ */ +public class PathExecutor extends SqlJsonPathBaseVisitor { + + private static final String START_SUBSCRIPT = "__json_start_subscript"; + private static final String END_SUBSCRIPT = "__json_end_subscript"; + private static final JsonSequence lastJsonSequence = new JsonSequence("last"); + + private enum PathElement { START, MEMBER_ACCESSOR, MEMBER_WILDCARD, SINGLE_SUBCRIPT, MULTI_SUBSCRIPT, METHOD } + + private JsonSequence value; + private Map passing; + private ErrorListener errorListener; + // matching tracks the match we have made so far. It is modified by many methods in the visitor. During pre-filter + // operations it tracks the matches from the path expression. Once we are in the filter it tracks the match + // expressions of the filter fragment currently being considered. + private JsonSequence matching; + // matchAtFilter caches the match that was made before the filter was considered. + private JsonSequence matchAtFilter; + private Mode mode; + // Cache patterns that we compile as part of regular expression matching. This gets reset each time we execute + private Map regexPatterns; + private MessageDigest md5; + private PathElement previousElement; + + @VisibleForTesting + JsonSequence returnedByVisit; + + // TODO Check against spec on 709 2 near top + + public PathExecutor() { + regexPatterns = new HashMap<>(); + try { + md5 = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException("Programming error", e); + } + } + + /** + * Execute a SQL/JSON Path statement against a particular bit of JSON + * @param parseResult info from parsing the expression + * @param value JSON value to execute the Path statement against. If you want to pass a SQL NULL in this place + * pass a Java null. + * @param passing map of arguments defined in the parse tree + * @return value of executing the Path statement against the value + * @throws JsonPathException if a semantic or runtime error occurs. + */ + public JsonSequence execute(PathParseResult parseResult, JsonSequence value, Map passing) throws JsonPathException { + // Per the spec, if a null value is passed in, the result is empty but successful (p 707, 11.b.i.1.A) + if (value == null) return JsonSequence.emptyResult; + this.value = value; + this.passing = passing == null ? Collections.emptyMap() : passing; + errorListener = parseResult.errorListener; + errorListener.clear(); + matching = null; + mode = Mode.STRICT; // Strict is default, if the path expression specifies lax it will be set in the visit + previousElement = PathElement.START; + regexPatterns.clear(); + returnedByVisit = visit(parseResult.parseTree); + errorListener.checkForErrors(parseResult.pathExpr); + return matching; + } + + @Override + public JsonSequence visitPath_mode(SqlJsonPathParser.Path_modeContext ctx) { + mode = Mode.valueOf(ctx.getText().toUpperCase()); + if (mode == Mode.LAX) { + errorListener.semanticError("lax mode not supported", ctx); + } + return null; + } + + // Visit methods return JSON sequences. However, these are not the current match. These are used for building + // up values via arithmetic, etc. The match we have seen so far is built up in the matching member. + + @Override + public JsonSequence visitAdditive_expression(SqlJsonPathParser.Additive_expressionContext ctx) { + if (ctx.getChildCount() == 1) return visit(ctx.getChild(0)); + assert ctx.getChildCount() == 3; + ParseTree operator = ctx.getChild(1); + JsonSequence val1 = visit(ctx.getChild(0)); + JsonSequence val2 = visit(ctx.getChild(2)); + switch (operator.getText()) { + case "+": val1.add(val2, errorListener, ctx); break; + case "-": val1.subtract(val2, errorListener, ctx); break; + default: throw new RuntimeException("Programming error"); + } + return val1; + } + + @Override + public JsonSequence visitMultiplicative_expression(SqlJsonPathParser.Multiplicative_expressionContext ctx) { + if (ctx.getChildCount() == 1) return visit(ctx.getChild(0)); + assert ctx.getChildCount() == 3; + ParseTree operator = ctx.getChild(1); + JsonSequence val1 = visit(ctx.getChild(0)); + JsonSequence val2 = visit(ctx.getChild(2)); + switch (operator.getText()) { + case "*": val1.multiply(val2, errorListener, ctx); break; + case "/": val1.divide(val2, errorListener, ctx); break; + case "%": val1.modulo(val2, errorListener, ctx); break; + default: throw new RuntimeException("Programming error"); + } + return val1; + } + + @Override + public JsonSequence visitUnary_expression(SqlJsonPathParser.Unary_expressionContext ctx) { + if (ctx.getChildCount() == 1) return visit(ctx.getChild(0)); + assert ctx.getChildCount() == 2; + ParseTree operator = ctx.getChild(0); + JsonSequence val = visit(ctx.getChild(1)); + switch (operator.getText()) { + case "+": break; + case "-": val.negate(errorListener, ctx); break; + default: throw new RuntimeException("Programming error"); + } + return val; + } + + @Override + public JsonSequence visitPath_null_literal(SqlJsonPathParser.Path_null_literalContext ctx) { + return JsonSequence.nullJsonSequence; + } + + @Override + public JsonSequence visitPath_boolean_literal(SqlJsonPathParser.Path_boolean_literalContext ctx) { + if (ctx.getText().equalsIgnoreCase("true")) return JsonSequence.trueJsonSequence; + else if (ctx.getText().equalsIgnoreCase("false")) return JsonSequence.falseJsonSequence; + else throw new RuntimeException("Programming error"); + } + + @Override + public JsonSequence visitPath_integer_literal(SqlJsonPathParser.Path_integer_literalContext ctx) { + return new JsonSequence(Long.valueOf(ctx.getText())); + } + + @Override + public JsonSequence visitPath_decimal_literal(SqlJsonPathParser.Path_decimal_literalContext ctx) { + return new JsonSequence(Double.valueOf(ctx.getText())); + } + + @Override + public JsonSequence visitPath_string_literal(SqlJsonPathParser.Path_string_literalContext ctx) { + String val = ctx.getText(); + return new JsonSequence(stripQuotes(val)); + } + + @Override + public JsonSequence visitPath_named_variable(SqlJsonPathParser.Path_named_variableContext ctx) { + String id = ctx.getChild(1).getText(); + JsonSequence val = passing.get(id); + if (val == null) { + errorListener.semanticError("Variable " + id + + " referenced in path expression but no matching id found in passing clause", ctx); + return JsonSequence.nullJsonSequence; + } else { + return val; + } + } + + @Override + public JsonSequence visitPath_context_variable(SqlJsonPathParser.Path_context_variableContext ctx) { + // Sets the matching as the root of the tree. + matching = value; + return null; + } + + @Override + public JsonSequence visitPath_at_variable(SqlJsonPathParser.Path_at_variableContext ctx) { + previousElement = PathElement.START; + matching = matchAtFilter; + return null; + } + + @Override + public JsonSequence visitMember_accessor_id(SqlJsonPathParser.Member_accessor_idContext ctx) { + matching = accessMember(ctx.getChild(1).getText()); + previousElement = PathElement.MEMBER_ACCESSOR; + return null; + } + + @Override + public JsonSequence visitMember_accessor_string(SqlJsonPathParser.Member_accessor_stringContext ctx) { + matching = accessMember(stripQuotes(ctx.getChild(1).getText())); + previousElement = PathElement.MEMBER_ACCESSOR; + return null; + } + + @Override + public JsonSequence visitWildcard_member_accessor(SqlJsonPathParser.Wildcard_member_accessorContext ctx) { + if (matching != null && matching.isObject()) { + // I think I'm supposed to return the entire object here + previousElement = PathElement.MEMBER_WILDCARD; + return null; + } + matching = JsonSequence.emptyResult; + return null; + } + + @Override + public JsonSequence visitArray_accessor(SqlJsonPathParser.Array_accessorContext ctx) { + if (matching == null || matching.isNull()) return null; + + JsonSequence subscripts = visit(ctx.getChild(1)); + assert subscripts.isList(); + + // It is legitimate here for matching to be either a list ($.name[3]) or an object ($.*[3]). In the list + // case we want to return a list. In the object case, we want to return an object, with only the fields that + // are a list. + if (matching.isList()) { + matching = applySubscriptsToOneArray(matching, subscripts, ctx); + } else if (matching.isObject() && previousElement == PathElement.MEMBER_WILDCARD) { + JsonSequence newMatches = new JsonSequence(new HashMap<>()); + for (Map.Entry matchingEntry : matching.asObject().entrySet()) { + if (matchingEntry.getValue().isList()) { + JsonSequence res = applySubscriptsToOneArray(matchingEntry.getValue(), subscripts, ctx); + if (res != JsonSequence.emptyResult) { + newMatches.asObject().put(matchingEntry.getKey(), res); + } + } + } + matching = newMatches; + } else { + matching = JsonSequence.emptyResult; + } + return null; + } + + @Override + public JsonSequence visitWildcard_array_accessor(SqlJsonPathParser.Wildcard_array_accessorContext ctx) { + if (matching == null || matching.isNull()) return null; + + // It is legitimate here for matching to be either a list ($.name[*]) or an object ($.*[*]). In the list + // case we want to return a list. In the object case, we want to return an object, with only the fields that + // are a list. + if (matching.isList()) { + // NOP + } else if (matching.isObject() && previousElement == PathElement.MEMBER_WILDCARD) { + JsonSequence newMatches = new JsonSequence(new HashMap<>()); + for (Map.Entry matchingEntry : matching.asObject().entrySet()) { + if (matchingEntry.getValue().isList()) { + newMatches.asObject().put(matchingEntry.getKey(), matchingEntry.getValue()); + } + } + matching = newMatches; + } else { + matching = JsonSequence.emptyResult; + } + previousElement = PathElement.MULTI_SUBSCRIPT; + return null; + } + + /** + * + * @param ctx + * @return a list of subscripts. One or more of these could be objects with start and end value which + * represent the use 'to' + */ + @Override + public JsonSequence visitSubscript_list(SqlJsonPathParser.Subscript_listContext ctx) { + if (ctx.getChildCount() == 1) { + // This is the simple subscript case, but it might still be a 'x TO y' + JsonSequence subscript = visit(ctx.getChild(0)); + List list = new ArrayList<>(); + list.add(subscript); + return new JsonSequence(list); + } else if (ctx.getChildCount() == 3) { + JsonSequence subscriptList = visit(ctx.getChild(0)); + JsonSequence subscript = visit(ctx.getChild(2)); + subscriptList.asList().add(subscript); + return subscriptList; + } else { + throw new RuntimeException("Programming error"); + } + } + + /** + * + * @param ctx + * @return a Json object with start and end tokens + */ + @Override + public JsonSequence visitSubscript_to(SqlJsonPathParser.Subscript_toContext ctx) { + JsonSequence startSeq = visit(ctx.getChild(0)); + JsonSequence endSeq = visit(ctx.getChild(2)); + if (endSeq != lastJsonSequence && endSeq.asLong() < startSeq.asLong()) { + errorListener.runtimeError("The end subscript must be greater than or equal to the start subscript", ctx); + return JsonSequence.nullJsonSequence; + } + Map subscripts = new HashMap<>(); + subscripts.put(START_SUBSCRIPT, startSeq); + subscripts.put(END_SUBSCRIPT, endSeq); + return new JsonSequence(subscripts); + } + + @Override + public JsonSequence visitSubscript_last(SqlJsonPathParser.Subscript_lastContext ctx) { + return lastJsonSequence; + } + + @Override + public JsonSequence visitMethod_type(SqlJsonPathParser.Method_typeContext ctx) { + // Note, differences from the SQL spec. This does not try to guess if something is a datetime + if (matching != JsonSequence.emptyResult) { + switch (matching.getType()) { + case LONG: + case DOUBLE: + matching = new JsonSequence("number"); + break; + + case NULL: + matching = new JsonSequence("null"); // Note, this is a string with null, not the null value + break; + + case STRING: + matching = new JsonSequence("string"); + break; + + case BOOL: + matching = new JsonSequence("boolean"); + break; + + case LIST: + matching = new JsonSequence("array"); + break; + + case OBJECT: + matching = new JsonSequence("object"); + break; + + default: + throw new RuntimeException("Programming error"); + } + } + previousElement = PathElement.METHOD; + return null; + } + + @Override + public JsonSequence visitMethod_size(SqlJsonPathParser.Method_sizeContext ctx) { + if (matching != JsonSequence.emptyResult) { + if (matching.isList()) { + matching = new JsonSequence(matching.asList().size()); + } else if (matching.isObject()) { + // NOTE: this is not to spec. Per the spec everything but list should return 1, but asking the size + // of the object seems like a reasonable thing to do. + matching = new JsonSequence(matching.asObject().size()); + } else { + matching = new JsonSequence(1); + } + } + return null; + } + + @Override + public JsonSequence visitMethod_double(SqlJsonPathParser.Method_doubleContext ctx) { + switch (matching.getType()) { + case DOUBLE: + case EMPTY_RESULT: + break; + + case LONG: + matching = new JsonSequence((double)matching.asLong()); + break; + + case STRING: + matching = new JsonSequence(Double.valueOf(matching.asString())); + break; + + default: + errorListener.runtimeError("Double method requires numeric or string argument, passed a " + matching.getType().name().toLowerCase(), ctx); + break; + } + return null; + } + + @Override + public JsonSequence visitMethod_int(SqlJsonPathParser.Method_intContext ctx) { + switch (matching.getType()) { + case LONG: + case EMPTY_RESULT: + break; + + case DOUBLE: + matching = new JsonSequence((long)matching.asDouble()); + break; + + case STRING: + matching = new JsonSequence(Long.valueOf(matching.asString())); + break; + + default: + errorListener.runtimeError("Integer method requires numeric or string argument, passed a " + matching.getType().name().toLowerCase(), ctx); + break; + } + return null; + } + + @Override + public JsonSequence visitMethod_ceiling(SqlJsonPathParser.Method_ceilingContext ctx) { + switch (matching.getType()) { + case LONG: + case EMPTY_RESULT: + break; + + case DOUBLE: + // Note, I am going against the standard here by doing a type conversion from double -> long + matching = new JsonSequence((long)Math.ceil(matching.asDouble())); + break; + + default: + errorListener.runtimeError("Ceiling method requires numeric argument, passed a " + matching.getType().name().toLowerCase(), ctx); + break; + } + return null; + } + + @Override + public JsonSequence visitMethod_floor(SqlJsonPathParser.Method_floorContext ctx) { + switch (matching.getType()) { + case LONG: + case EMPTY_RESULT: + break; + + case DOUBLE: + // Note, I am going against the standard here by doing a type conversion from double -> long + matching = new JsonSequence((long)Math.floor(matching.asDouble())); + break; + + default: + errorListener.runtimeError("Floor method requires numeric argument, passed a " + matching.getType().name().toLowerCase(), ctx); + break; + } + return null; + } + + @Override + public JsonSequence visitMethod_abs(SqlJsonPathParser.Method_absContext ctx) { + switch (matching.getType()) { + case EMPTY_RESULT: + break; + + case LONG: + matching = new JsonSequence(Math.abs(matching.asLong())); + break; + + case DOUBLE: + matching = new JsonSequence(Math.abs(matching.asDouble())); + break; + + default: + errorListener.runtimeError("Abs method requires numeric argument, passed a " + matching.getType().name().toLowerCase(), ctx); + break; + } + return null; + } + + @Override + public JsonSequence visitFilter_expression(SqlJsonPathParser.Filter_expressionContext ctx) { + if (matching != JsonSequence.emptyResult) { + if (matching.isList()) { + // If this is a list, we need to apply the filter to each element in turn and built up a matching list + JsonSequence matchingList = new JsonSequence(new ArrayList<>()); + for (JsonSequence matchingElement : matching.asList()) { + matchAtFilter = matchingElement; + JsonSequence eval = visit(ctx.getChild(2)); + if (eval.asBool()) matchingList.asList().add(matchingElement); + } + matching = matchingList.asList().size() > 0 ? matchingList : JsonSequence.emptyResult; + } else { + // Cache the match we've seen so far. + matchAtFilter = matching; + JsonSequence eval = visit(ctx.getChild(2)); + assert eval.isBool(); + matching = eval.asBool() ? matchAtFilter : JsonSequence.emptyResult; + } + } + return null; + } + + @Override + public JsonSequence visitBoolean_disjunction(SqlJsonPathParser.Boolean_disjunctionContext ctx) { + if (ctx.getChildCount() == 1) { + return visit(ctx.getChild(0)); + } else { + JsonSequence left = visit(ctx.getChild(0)); + assert left.isBool(); + if (left.asBool()) return JsonSequence.trueJsonSequence; + JsonSequence right = visit(ctx.getChild(2)); + assert right.isBool(); + return right; + } + } + + @Override + public JsonSequence visitBoolean_conjunction(SqlJsonPathParser.Boolean_conjunctionContext ctx) { + if (ctx.getChildCount() == 1) { + return visit(ctx.getChild(0)); + } else { + JsonSequence left = visit(ctx.getChild(0)); + assert left.isBool(); + if (!left.asBool()) return JsonSequence.falseJsonSequence; + JsonSequence right = visit(ctx.getChild(2)); + assert right.isBool(); + return right; + } + } + + @Override + public JsonSequence visitBoolean_negation(SqlJsonPathParser.Boolean_negationContext ctx) { + if (ctx.getChildCount() == 1) { + return visit(ctx.getChild(0)); + } else { + JsonSequence val = visit(ctx.getChild(1)); + assert val.isBool(); + return val.asBool() ? JsonSequence.falseJsonSequence : JsonSequence.trueJsonSequence; + } + } + + @Override + public JsonSequence visitDelimited_predicate(SqlJsonPathParser.Delimited_predicateContext ctx) { + if (ctx.getChildCount() == 1) { + return visit(ctx.getChild(0)); + } else { + return visit(ctx.getChild(1)); + } + } + + @Override + public JsonSequence visitExists_path_predicate(SqlJsonPathParser.Exists_path_predicateContext ctx) { + JsonSequence val = visit(ctx.getChild(2)); + return matching != JsonSequence.emptyResult ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + // In all these comparison predicates the results can come to us in one of two ways. The node may be a constant + // which means we'll get the answer back as a result of visiting the node. It may also be a path match, which + // means we'll get it by looking in matching. So look at the result of the visit first, and if it's null, then + // look at matching. If both are null throw up our hands. + + @Override + public JsonSequence visitComparison_predicate_equals(SqlJsonPathParser.Comparison_predicate_equalsContext ctx) { + return binaryComparisonOperator(ctx, (lf, rt) -> lf.equalsOp(rt, errorListener, ctx)); + } + + @Override + public JsonSequence visitComparison_predicate_not_equals(SqlJsonPathParser.Comparison_predicate_not_equalsContext ctx) { + return binaryComparisonOperator(ctx, (lf, rt) -> lf.notEqualsOp(rt, errorListener, ctx)); + } + + @Override + public JsonSequence visitComparison_predicate_greater_than(SqlJsonPathParser.Comparison_predicate_greater_thanContext ctx) { + return binaryComparisonOperator(ctx, (lf, rt) -> lf.greaterThanOp(rt, errorListener, ctx)); + } + + @Override + public JsonSequence visitComparison_predicate_greater_than_equals(SqlJsonPathParser.Comparison_predicate_greater_than_equalsContext ctx) { + return binaryComparisonOperator(ctx, (lf, rt) -> lf.greaterThanEqualOp(rt, errorListener, ctx)); + } + + @Override + public JsonSequence visitComparison_predicate_less_than(SqlJsonPathParser.Comparison_predicate_less_thanContext ctx) { + return binaryComparisonOperator(ctx, (lf, rt) -> lf.lessThanOp(rt, errorListener, ctx)); + } + + @Override + public JsonSequence visitComparison_predicate_less_than_equals(SqlJsonPathParser.Comparison_predicate_less_than_equalsContext ctx) { + return binaryComparisonOperator(ctx, (lf, rt) -> lf.lessThanEqualOp(rt, errorListener, ctx)); + } + + @Override + public JsonSequence visitLike_regex_predicate(SqlJsonPathParser.Like_regex_predicateContext ctx) { + visit(ctx.getChild(0)); + // The left side should always be the path, and right a constant + JsonSequence left = matching; + JsonSequence right = visit(ctx.getChild(2)); + if (!left.isString()) { + errorListener.semanticError("Regular expressions can only be used on strings", ctx); + return JsonSequence.falseJsonSequence; + } + assert right.isString(); + + md5.reset(); + md5.update(right.asString().getBytes()); + byte[] key = md5.digest(); + Pattern pattern = regexPatterns.get(key); + if (pattern == null) { + try { + pattern = Pattern.compile(right.asString()); + regexPatterns.put(key, pattern); + } catch (PatternSyntaxException e) { + errorListener.semanticError("Regular expression syntax error " + e.getMessage(), ctx); + return JsonSequence.falseJsonSequence; + } + } + Matcher m = pattern.matcher(left.asString()); + return m.find(0) ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + @Override + public JsonSequence visitStarts_with_predicate(SqlJsonPathParser.Starts_with_predicateContext ctx) { + visit(ctx.getChild(0)); + // The left side should always be the path, and right a constant or passed in variable + JsonSequence left = matching; + JsonSequence right = visit(ctx.getChild(3)); + + if (!left.isString() || !right.isString()) { + errorListener.semanticError("Starts with can only be used with strings", ctx); + return JsonSequence.falseJsonSequence; + } + + return left.asString().startsWith(right.asString()) ? JsonSequence.trueJsonSequence : JsonSequence.falseJsonSequence; + } + + @VisibleForTesting + Mode getMode() { + return mode; + } + + private int checkSubscript(JsonSequence subscript, ParserRuleContext ctx) throws IOException { + if (!subscript.isLong()) { + errorListener.semanticError("Subscripts must be integer values", ctx); + throw new IOException(); + } + if (subscript.asLong() > Integer.MAX_VALUE) { + errorListener.runtimeError("Subscripts cannot exceed " + Integer.MAX_VALUE, ctx); + throw new IOException(); + } + return (int)subscript.asLong(); + } + + private JsonSequence applySubscriptsToOneArray(JsonSequence oneList, JsonSequence subscripts, ParserRuleContext ctx) { + try { + JsonSequence newList = new JsonSequence(new ArrayList<>()); + for (JsonSequence sub : subscripts.asList()) { + // This might be a number, or it might be an object with a start and end subscript, or it might be 'last'. + if (sub == lastJsonSequence) { // Use of == intentional here + newList.asList().add(oneList.asList().get(oneList.asList().size() - 1)); + } else if (sub.isLong()) { + if (sub.asLong() < oneList.asList().size()) { // make sure we don't fly off the end + newList.asList().add(oneList.asList().get(checkSubscript(sub, ctx))); + } + } else if (sub.isObject()) { + assert sub.asObject().containsKey(START_SUBSCRIPT); + int start = checkSubscript(sub.asObject().get(START_SUBSCRIPT), ctx); + assert sub.asObject().containsKey(END_SUBSCRIPT); + JsonSequence endSubscript = sub.asObject().get(END_SUBSCRIPT); + int end = (endSubscript == lastJsonSequence) ? end = oneList.asList().size() - 1 : checkSubscript(endSubscript, ctx); + // visitSubscript_to already checked that start <= end + for (int i = start; i <= end; i++) { + if (i >= oneList.asList().size()) break; // Don't fly off the end + newList.asList().add(oneList.asList().get(i)); + } + } else { + throw new RuntimeException("programming error, sub is a " + sub.getType()); + } + } + // if only one value was accessed unwrap it + if (subscripts.asList().size() == 1 && (subscripts.asList().get(0) == lastJsonSequence || subscripts.asList().get(0).isLong())) { + if (newList.asList().size() > 0) { + previousElement = PathElement.SINGLE_SUBCRIPT; + return newList.asList().get(0); + } + else return JsonSequence.emptyResult; + } else { + previousElement = PathElement.MULTI_SUBSCRIPT; + return newList; + } + } catch (IOException e) { + return JsonSequence.nullJsonSequence; + } + } + + private JsonSequence accessMember(String memberKey) { + if (matching.isObject()) { + return accessMemberInObject(matching, memberKey); + } else if (matching.isList() && (previousElement == PathElement.MULTI_SUBSCRIPT)) { + JsonSequence newMatching = new JsonSequence(new ArrayList<>()); + for (JsonSequence element : matching.asList()) { + if (element.isObject()) { + JsonSequence newMember = accessMemberInObject(element, memberKey); + if (newMember != JsonSequence.emptyResult) newMatching.asList().add(newMember); + } + } + return newMatching; + } + return JsonSequence.emptyResult; + + } + + private JsonSequence accessMemberInObject(JsonSequence object, String memberKey) { + Map m = object.asObject(); + JsonSequence next = m.get(memberKey); + return next == null ? JsonSequence.emptyResult : next; + } + + private String stripQuotes(String quotedStr) { + return quotedStr.substring(1, quotedStr.length() - 1); + } + + private JsonSequence binaryComparisonOperator(ParserRuleContext ctx, BinaryOperator comparisonOp) { + JsonSequence left = visit(ctx.getChild(0)); + left = left == null ? matching : left; + JsonSequence right = visit(ctx.getChild(2)); + right = right == null ? matching : right; + return comparisonOp.apply(left, right); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathParseResult.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathParseResult.java new file mode 100644 index 0000000000..2c8f387cda --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathParseResult.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.antlr.v4.runtime.tree.ParseTree; + +public class PathParseResult { + final ParseTree parseTree; + final ErrorListener errorListener; + final String pathExpr; + + public PathParseResult(ParseTree parseTree, ErrorListener errorListener, String pathExpr) { + this.parseTree = parseTree; + this.errorListener = errorListener; + this.pathExpr = pathExpr; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathParser.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathParser.java new file mode 100644 index 0000000000..36aa9d2834 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/PathParser.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.hadoop.hive.ql.udf.generic.SqlJsonPathLexer; +import org.apache.hadoop.hive.ql.udf.generic.SqlJsonPathParser; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +public class PathParser { + + /** + * Parse a path expression. + * @return {@link PathParseResult} with results of the parse + * @param pathExpression the path expression to parse + * @throws JsonPathException if the expression fails to parse + * @throws IOException if Antlr fails to read the input stream (shouldn't really happen unless you call it with a null string). + */ + public PathParseResult parse(String pathExpression) throws JsonPathException, IOException { + ErrorListener errorListener = new ErrorListener(); + SqlJsonPathLexer scanner = new SqlJsonPathLexer(new ANTLRInputStream(new ByteArrayInputStream(pathExpression.getBytes()))); + CommonTokenStream tokens = new CommonTokenStream(scanner); + SqlJsonPathParser parser = new SqlJsonPathParser(tokens); + parser.addErrorListener(errorListener); + ParseTree tree = parser.path_expression(); + errorListener.checkForErrors(pathExpression); + return new PathParseResult(tree, errorListener, pathExpression); + + } +} diff --git ql/src/main/antlr4/org/apache/hadoop/hive/ql/udf/generic/Json.g4 ql/src/main/antlr4/org/apache/hadoop/hive/ql/udf/generic/Json.g4 new file mode 100644 index 0000000000..5d72cd9d9d --- /dev/null +++ ql/src/main/antlr4/org/apache/hadoop/hive/ql/udf/generic/Json.g4 @@ -0,0 +1,101 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +grammar Json; + +object: + T_OPENBRACE T_CLOSEBRACE + | T_OPENBRACE element_list T_CLOSEBRACE + ; + +element_list: + element + | element_list T_COMMA element + ; + +element: + T_DOUBLEQUOTE_STR T_COLON value + ; + +value: + object + | array + | null_literal + | boolean_literal + | int_literal + | decimal_literal + | string_literal + ; + +array: + T_OPENBRACKET T_CLOSEBRACKET + | T_OPENBRACKET array_list T_CLOSEBRACKET + ; + +array_list: + array_element + | array_list T_COMMA array_element + ; + +array_element: + value + ; + +null_literal: + T_NULL + ; + +boolean_literal: + T_TRUE + | T_FALSE + ; + +int_literal: + T_INT + ; + +decimal_literal: + T_DECIMAL + ; + +string_literal: + T_DOUBLEQUOTE_STR + ; + + +T_COLON : ':' ; +T_COMMA : ',' ; +T_OPENBRACE : '{' ; +T_CLOSEBRACE : '}' ; +T_OPENBRACKET : '[' ; +T_CLOSEBRACKET : ']' ; + +T_FALSE : 'false' ; +T_NULL : 'null' ; +T_TRUE : 'true' ; + +T_INT : ('+'|'-')? [0-9]+ ('e'|'E' ('+'|'-')? [0-9]+)? ; +T_DECIMAL : ('+'|'-')? [0-9]+ '.' [0-9]* ('e'|'E' ('+'|'-')? [0-9]+)? + | ('+'|'-')? '.' [0-9]+ ('e'|'E' ('+'|'-')? [0-9]+)? ; + +T_DOUBLEQUOTE_STR : '"' (DOUBLEQUOTE_ESC_CHAR | ~('"' | '\\'))* '"' ; + +DOUBLEQUOTE_ESC_CHAR: '\\"' | ESC_CHAR ; + +ESC_CHAR : '\\\\' | '\\r' | '\\n' | '\\t' | '\\u' [0-9a-f][0-9a-f][0-9a-f][0-9a-f] ; + +WS : [ \t\r\n]+ -> skip ; diff --git ql/src/main/antlr4/org/apache/hadoop/hive/ql/udf/generic/SqlJsonPath.g4 ql/src/main/antlr4/org/apache/hadoop/hive/ql/udf/generic/SqlJsonPath.g4 new file mode 100644 index 0000000000..6577650f10 --- /dev/null +++ ql/src/main/antlr4/org/apache/hadoop/hive/ql/udf/generic/SqlJsonPath.g4 @@ -0,0 +1,406 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +grammar SqlJsonPath; + +// I've copied this more or less directly from the SQL 2016 spec, sections 9.38 and 9.39 +// Thus some of the rules are silly or trivial, but it makes it easier to follow +// when looking at the spec. In some places where it didn't seem to matter or was +// just too silly to put up with I've reduced it. + +path_expression: + path_wff // If I read the spec correctly, STRICT/LAX is required, but that seems pendantic. Better to default to lax. + | path_mode path_wff + ; + +path_mode: + T_STRICT + | T_LAX + ; + +path_wff: // What does wff stand for? + additive_expression + ; + +additive_expression: + multiplicative_expression + | additive_expression T_PLUS multiplicative_expression + | additive_expression T_MINUS multiplicative_expression + ; + +multiplicative_expression: + unary_expression + | multiplicative_expression T_STAR unary_expression + | multiplicative_expression T_SLASH unary_expression + | multiplicative_expression T_PERCENT unary_expression + ; + +unary_expression: + accessor_expression + | T_PLUS unary_expression + | T_MINUS unary_expression + ; + +accessor_expression: + path_primary + | accessor_expression accessor_op + ; + +path_primary: + path_literal + | path_variable + | T_OPENPAREND path_wff T_CLOSEPAREND + ; + +path_variable: + path_context_variable + | path_named_variable + | path_at_variable + ; + +path_context_variable: + T_DOLLAR + ; + +path_named_variable: + T_DOLLAR T_IDENTIFIER + ; + +path_at_variable: + T_AT + ; + +accessor_op: + member_accessor + | wildcard_member_accessor + | array_accessor + | wildcard_array_accessor + | filter_expression + | item_method + ; + +member_accessor: + member_accessor_id + | member_accessor_string + ; + +member_accessor_id: + T_DOT T_IDENTIFIER + ; + +member_accessor_string: + T_DOT path_string_literal + ; + +wildcard_member_accessor: + T_DOT T_STAR + ; + +array_accessor: + T_OPENBRACKET subscript_list T_CLOSEBRACKET + ; + +subscript_list: + subscript + | subscript_list T_COMMA subscript + ; + +subscript: + subscript_simple + | subscript_to + ; + +subscript_simple: + subscript_item + ; + +subscript_to: + path_wff T_TO subscript_item + ; + +subscript_item: + path_wff + | subscript_last + ; + +subscript_last: + T_LAST + ; + +wildcard_array_accessor: + T_OPENBRACKET T_STAR T_CLOSEBRACKET + ; + +filter_expression: + T_QUESTION T_OPENPAREND path_predicate T_CLOSEPAREND + ; + +item_method: + T_DOT method + ; + +method: + method_type + | method_size + | method_double + | method_int // Added in, not part of the spec. But it seems goofy to have double() and not int() + // since ceiling and floor don't take strings. You can get around it with string.double().floor() + // but that seems silly. + | method_ceiling + | method_floor + | method_abs +// | T_DATETIME T_OPENPAREND path_string_literal? T_CLOSEPAREND +// | T_KEYVALUE T_OPENPAREND T_CLOSEPAREND + ; + +// Note, we do not currently support the DATETIME or KEYVALUE methods. There are methods in Hive to do casts from +// String to Datetime, there's no reason to embed the same functionality in here. +// As far as I can tell KEYVALUE is a way to remove duplicate keys, which my implementation doesn't support anyway. + +method_type: + T_TYPE T_OPENPAREND T_CLOSEPAREND + ; + +method_size: + T_SIZE T_OPENPAREND T_CLOSEPAREND + ; + +method_int: + T_INTFUNC T_OPENPAREND T_CLOSEPAREND + ; + +method_double: + T_DOUBLE T_OPENPAREND T_CLOSEPAREND + ; + +method_ceiling: + T_CEILING T_OPENPAREND T_CLOSEPAREND + ; + +method_floor: + T_FLOOR T_OPENPAREND T_CLOSEPAREND + ; + +method_abs: + T_ABS T_OPENPAREND T_CLOSEPAREND + ; + + +// predicates +path_predicate: + boolean_disjunction + ; + +boolean_disjunction: + boolean_conjunction + | boolean_disjunction T_OR boolean_conjunction + ; + +boolean_conjunction: + boolean_negation + | boolean_conjunction T_AND boolean_negation + ; + +boolean_negation: + predicate_primary + | T_BANG delimited_predicate + ; + +predicate_primary: + delimited_predicate + | nondelimited_predicate + ; + +delimited_predicate: + exists_path_predicate + | T_OPENPAREND path_predicate T_CLOSEPAREND + ; + +nondelimited_predicate: + comparison_predicate + | like_regex_predicate + | starts_with_predicate + | unknown_predicate + ; + +exists_path_predicate: + T_EXISTS T_OPENPAREND path_wff T_CLOSEPAREND + ; + +comparison_predicate: + comparison_predicate_equals + | comparison_predicate_not_equals + | comparison_predicate_greater_than + | comparison_predicate_greater_than_equals + | comparison_predicate_less_than + | comparison_predicate_less_than_equals + ; + +comparison_predicate_equals: + path_wff T_EQUALS path_wff + ; + +comparison_predicate_not_equals: + path_wff T_NE path_wff + ; + +comparison_predicate_greater_than: + path_wff T_GT path_wff + ; + +comparison_predicate_greater_than_equals: + path_wff T_GE path_wff + ; + +comparison_predicate_less_than: + path_wff T_LT path_wff + ; + +comparison_predicate_less_than_equals: + path_wff T_LE path_wff + ; + +like_regex_predicate: + path_wff T_LIKEREGEX path_string_literal // NOTE - we don't support flags + ; + +starts_with_predicate: + path_wff T_STARTS T_WITH starts_with_initial + ; + +starts_with_initial: + path_string_literal + | path_named_variable + ; + +unknown_predicate: + T_OPENPAREND path_predicate T_CLOSEPAREND T_IS T_UNKNOWN + ; + +path_literal: + path_null_literal + | path_boolean_literal + | path_numeric_literal + | path_string_literal + ; + +path_null_literal: + T_NULL + ; + +path_boolean_literal: + T_TRUE + | T_FALSE + ; + +path_numeric_literal: + path_integer_literal + | path_decimal_literal + ; + +path_integer_literal: + T_INT + ; + +path_decimal_literal: + T_DECIMAL + ; + +path_string_literal: + T_SINGLEQUOTE_STR + | T_DOUBLEQUOTE_STR + ; + + +// Lexical tokens +T_STAR : '*' ; +T_AT : '@' ; +T_COMMA : ',' ; +T_DOT : '.' ; +T_DOLLAR : '$' ; +T_QUESTION : '?' ; +T_AND : '&&' ; +T_OR : '||' ; +T_BANG : '!' ; +T_GT : '>' ; +T_GE : '>=' ; +T_LT : '<' ; +T_LE : '<=' ; +T_EQUALS : '==' ; +T_NE : '<>' + | '!=' ; // NOTE - != is not in the spec, but given that Hive supports it I added it +T_OPENBRACKET : '[' ; +T_OPENPAREND : '(' ; +T_CLOSEBRACKET : ']' ; +T_CLOSEPAREND : ')' ; +T_PLUS : '+' ; +T_MINUS : '-' ; +T_SLASH : '/' ; +T_PERCENT : '%' ; + +// keywords +T_ABS : 'abs' ; +T_CEILING : 'ceiling' ; +T_DATETIME : 'datetime' ; +T_DOUBLE : 'double' ; +T_EXISTS : 'exists' ; +T_FALSE : 'false' ; +T_FLOOR : 'floor' ; +T_IS : 'is' ; +T_INTFUNC : 'integer' ; // Added in, not part of the spec +T_KEYVALUE : 'keyvalue' ; +T_LAST : 'last' ; +T_LAX : 'lax' ; +T_LIKEREGEX : 'like_regex' ; +T_NULL : 'null' ; +T_SIZE : 'size' ; +T_STARTS : 'starts' ; +T_STRICT : 'strict' ; +T_TO : 'to' ; +T_TRUE : 'true' ; +T_TYPE : 'type' ; +T_UNKNOWN : 'unknown' ; +T_WITH : 'with' ; + +T_INT : [0-9]+ ('e'|'E' ('+'|'-')? [0-9]+)? ; +T_DECIMAL : [0-9]+ '.' [0-9]* ('e'|'E' ('+'|'-')? [0-9]+)? + | '.' [0-9]+ ('e'|'E' ('+'|'-')? [0-9]+)? ; + +// NOTE - this does not match the spec at all. For now I have it set to just ASCII +// letters and numbers. It's supposed to support any unicode characters and numbers +// plus any unicode connecting character (rather than just '_'). Antlr does not +// have support for unicode character classes. Ideally we need to figure out a way +// to support that, at least more common characters. But an argument can also be made +// for matching Hive's identifier support. +T_IDENTIFIER : ([0-9]|[a-z]|[A-Z]) ([0-9]|[a-z]|[A-Z]|'_')* ; + +// NOTE, this does not exactly match the SQL/JSON Path spec, as that would allow strings +// like 'this is a \s\i\\l\l\y string' because it only specifies a few escape characters +// and says everything else should be ok. This instead says you can't have any \ in +// your string unless it is itself escaped \\, is a \n, \t, or \r, unicode escape, or +// quote escape \' or \" This is both much easier to write a rule for and in my (Alan's) +// opinion much more reasonable. +T_SINGLEQUOTE_STR : '\'' (SINGLEQUOTE_ESC_CHAR | ~('\'' | '\\'))* '\'' ; +T_DOUBLEQUOTE_STR : '"' (DOUBLEQUOTE_ESC_CHAR | ~('"' | '\\'))* '"' ; + +SINGLEQUOTE_ESC_CHAR: '\\\'' | ESC_CHAR ; +DOUBLEQUOTE_ESC_CHAR: '\\"' | ESC_CHAR ; + +ESC_CHAR : '\\\\' | '\\r' | '\\n' | '\\t' | '\\u' [0-9a-f][0-9a-f][0-9a-f][0-9a-f] ; + +WS : [ \t\r\n]+ -> skip ; + diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/BaseTestGenericUDFJson.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/BaseTestGenericUDFJson.java new file mode 100644 index 0000000000..fb60a18485 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/BaseTestGenericUDFJson.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaConstantStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import static org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import static org.apache.hadoop.hive.ql.udf.generic.GenericUDFJsonValue.WhatToReturn; + +abstract class BaseTestGenericUDFJson { + protected List json; + + @Before + public void buildJson() { + json = Arrays.asList( + "{" + + "\"name\" : \"chris\"," + + "\"age\" : 21," + + "\"gpa\" : 3.24," + + "\"honors\" : false," + + "\"sports\" : [ \"baseball\", \"soccer\" ]" + + "}", + "{" + + "\"name\" : \"tracy\"," + + "\"age\" : 20," + + "\"gpa\" : 3.94," + + "\"honors\" : true," + + "\"sports\" : [ \"basketball\" ]" + + "}", + "{" + + "\"name\" : null," + + "\"age\" : null," + + "\"gpa\" : null," + + "\"honors\" : null," + + "\"sports\" : null" + + "}", + "{" + + "\"boollist\" : [ true, false ]," + + "\"longlist\" : [ 3, 26 ]," + + "\"doublelist\" : [ 3.52, 2.86 ]," + + "\"multilist\" : [ \"string\", 1, 2.3, false, null ]," + + "\"longstring\" : \"42\"," + + "\"doublestring\" : \"3.1415\"," + + "\"boolstring\" :\"true\"," + + "\"subobj\" : { " + + "\"str\" : \"strval\"," + + "\"long\" : -1," + + "\"decimal\" : -2.2," + + "\"bool\" : true" + + "}," + + "\"nested\" : { " + + "\"nestedlist\" : [ {" + + "\"anothernest\" : [ 10, 100, 1000 ]" + + "}" + + "] }" + + "}" + ); + } + + + protected ObjectPair test(List jsonValues, String pathExpr) throws HiveException { + return test(jsonValues, pathExpr, null); + } + + protected ObjectPair test(List jsonValues, String pathExpr, List defaultVals) + throws HiveException { + return test(jsonValues, pathExpr, defaultVals, null); + } + + protected ObjectPair test(List jsonValues, String pathExpr, List defaultVals, + WhatToReturn onEmpty) throws HiveException { + return test(jsonValues, pathExpr, defaultVals, onEmpty, null); + } + + protected ObjectPair test(List jsonValues, String pathExpr, List defaultVals, + WhatToReturn onEmpty, WhatToReturn onError) throws HiveException { + return test(jsonValues, pathExpr, defaultVals, onEmpty, onError, null); + } + + /** + * Run a test. + * @param jsonValues list of values to pass to json_value() + * @param pathExpr path expression + * @param defaultVals list of default values. Can be null if there are no default values. Can contain a single + * element if you want the default to be constant. Otherwise it should have the same number of + * values as jsonValues. + * @param onEmpty action on empty, can be null + * @param onError action on error, can be null + * @param passing list of maps to pass in as passing values, can be null + * @return object inspector to read results with and array of results + * @throws HiveException if thrown by json_value + */ + protected ObjectPair test(List jsonValues, String pathExpr, List defaultVals, + WhatToReturn onEmpty, WhatToReturn onError, + List> passing) throws HiveException { + GenericUDFJsonValue udf = getUdf(); + // Figure out our default values, if there are any. Note that this also fills out the contents of defaultVal if + // one 1 constant value was passed there + ObjectInspector returnOI = objectInspectorFromDefaultVals(defaultVals, jsonValues.size(), defaultVals == null || defaultVals.size() == 1); + // Figure out our passing object inspectors, if we need them + Map passingOIs = null; + if (passing != null) { + passingOIs = new HashMap<>(passing.get(0).size()); + Map> inverted = new HashMap<>(); + for (Map p : passing) { + for (Map.Entry e : p.entrySet()) { + List list = inverted.computeIfAbsent(e.getKey(), s -> new ArrayList<>()); + list.add(e.getValue()); + } + } + for (Map.Entry> inv : inverted.entrySet()) { + passingOIs.put(inv.getKey(), objectInspectorFromDefaultVals(inv.getValue(), inv.getValue().size(), false)); + } + } + ObjectInspector resultObjectInspector = udf.initialize(buildInitArgs(pathExpr, returnOI, onEmpty, onError, passingOIs)); + Object[] results = new Object[jsonValues.size()]; + for (int i = 0; i < jsonValues.size(); i++) { + Object o = udf.evaluate(buildExecArgs(jsonValues.get(i), pathExpr, + defaultVals == null ? null : defaultVals.get(i), onEmpty, onError, passing == null ? null : passing.get(i))); + results[i] = ObjectInspectorUtils.copyToStandardObject(o, resultObjectInspector); + } + return new ObjectPair<>(resultObjectInspector, results); + } + + private ObjectInspector[] buildInitArgs(String pathExpr, ObjectInspector returnOI, WhatToReturn onEmpty, + WhatToReturn onError, Map passing) { + List initArgs = new ArrayList<>(); + initArgs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + initArgs.add(new JavaConstantStringObjectInspector(pathExpr)); + // Nesting here is important because we want it to come out with the right number of args + if (returnOI != null) { + initArgs.add(returnOI); + if (onEmpty != null) { + initArgs.add(new JavaConstantStringObjectInspector(onEmpty.name())); + if (onError != null) { + initArgs.add(new JavaConstantStringObjectInspector(onError.name())); + if (passing != null) { + for (Map.Entry entry : passing.entrySet()) { + initArgs.add(new JavaConstantStringObjectInspector(entry.getKey())); + initArgs.add(entry.getValue()); + } + } + } + } + } + return initArgs.toArray(new ObjectInspector[0]); + } + + + private DeferredObject[] buildExecArgs(String jsonValue, String pathExpr, Object defaultVal, WhatToReturn onEmpty, + WhatToReturn onError, Map passing) { + List execArgs = new ArrayList<>(); + execArgs.add(wrapInDeferred(jsonValue)); + execArgs.add(wrapInDeferred(pathExpr)); + if (defaultVal != null) { + execArgs.add(wrapInDeferred(defaultVal)); + if (onEmpty != null) { + execArgs.add(wrapInDeferred(onEmpty.name())); + if (onError != null) { + execArgs.add(wrapInDeferred(onError.name())); + if (passing != null) { + for (Map.Entry entry : passing.entrySet()) { + execArgs.add(wrapInDeferred(entry.getKey())); + execArgs.add(wrapInDeferred(entry.getValue())); + } + } + } + } + } + return execArgs.toArray(new DeferredObject[0]); + } + + private DeferredObject wrapInDeferred(Object obj) { + if (obj == null) return null; + else if (obj instanceof Map) return new DeferredJavaObject(new ArrayList<>(((Map)obj).values())); + else if (obj instanceof List) return new DeferredJavaObject(obj); + else if (obj instanceof String) return new DeferredJavaObject(new Text((String)obj)); + else if (obj instanceof Long) return new DeferredJavaObject(new LongWritable((Long)obj)); + else if (obj instanceof Integer) return new DeferredJavaObject(new IntWritable((Integer) obj)); + else if (obj instanceof Double) return new DeferredJavaObject(new DoubleWritable((Double)obj)); + else if (obj instanceof Float) return new DeferredJavaObject(new FloatWritable((Float)obj)); + else if (obj instanceof Boolean) return new DeferredJavaObject(new BooleanWritable((Boolean)obj)); + else throw new RuntimeException("what?"); + } + + private ObjectInspector objectInspectorFromDefaultVals(List defaultVals, int numRecords, boolean isConstant) { + if (defaultVals == null || defaultVals.isEmpty()) return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + Object first = defaultVals.get(0); + if (isConstant) { + while (defaultVals.size() < numRecords) defaultVals.add(defaultVals.get(0)); + } + return objToObjectInspector(first, isConstant); + } + + private ObjectInspector objToObjectInspector(Object obj, boolean isConstant) { + if (obj instanceof List) { + List list = (List)obj; + if (isConstant) { + return ObjectInspectorFactory.getStandardConstantListObjectInspector(objToObjectInspector( + list.get(0), isConstant), list); + } else { + return ObjectInspectorFactory.getStandardListObjectInspector(objToObjectInspector(list.get(0), isConstant)); + } + } else if (obj instanceof Map) { + Map map = (Map)obj; + List fields = new ArrayList<>(); + List ois = new ArrayList<>(); + List values = new ArrayList<>(); + for (Map.Entry e : map.entrySet()) { + fields.add(e.getKey()); + ois.add(objToObjectInspector(e.getValue(), isConstant)); + values.add(e.getValue()); + } + if (isConstant) { + return ObjectInspectorFactory.getStandardConstantStructObjectInspector(fields, ois, values); + } else { + return ObjectInspectorFactory.getStandardStructObjectInspector(fields, ois); + } + } else if (obj instanceof String) { + return isConstant ? PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, new Text((String)obj)) : + PrimitiveObjectInspectorFactory.writableStringObjectInspector; + } else if (obj instanceof Long) { + return isConstant ? PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.longTypeInfo, new LongWritable((Long)obj)) : + PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } else if (obj instanceof Integer) { + return isConstant ? PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.intTypeInfo, new IntWritable((Integer)obj)) : + PrimitiveObjectInspectorFactory.writableIntObjectInspector; + } else if (obj instanceof Float) { + return isConstant ? PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.floatTypeInfo, new FloatWritable((Float)obj)) : + PrimitiveObjectInspectorFactory.writableFloatObjectInspector; + } else if (obj instanceof Double) { + return isConstant ? PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.doubleTypeInfo, new DoubleWritable((Double)obj)) : + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } else if (obj instanceof Boolean) { + return isConstant ? PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo, new BooleanWritable((Boolean)obj)) : + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } else { + throw new RuntimeException("programming error"); + } + } + + protected List wrapInList(T element) { + List list = new ArrayList<>(); + list.add(element); + return list; + } + + abstract protected GenericUDFJsonValue getUdf(); +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIsJson.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIsJson.java new file mode 100644 index 0000000000..061e0019f1 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIsJson.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class TestGenericUDFIsJson { + + // Goal here isn't to test JSON permutations, TestJsonValueParser handles that. Just want to test that the + // is_json UDF works + + @Test + public void nullJson() throws HiveException { + Assert.assertNull(test(null)); + } + + @Test + public void emptyJson() throws HiveException { + Assert.assertFalse(test("")); + } + + @Test + public void badJson() throws HiveException { + Assert.assertFalse(test("bad json!")); + } + + @Test + public void goodJson() throws HiveException { + Assert.assertTrue(test("{ \"name\" : \"fred\" }")); + } + + @Test + public void multipleLines() throws HiveException { + Map inputs = new HashMap<>(); + inputs.put("{ \"name\" : \"fred\" }", true); + inputs.put("{ \"age\" : 35 }", true); + inputs.put("{ \"classes\" : [\"algebra\", \"painting\" ] }", true); + inputs.put("{ bad : }", false); + inputs.put("{ \"gpa\" : 1.35 }", true); + + GenericUDFIsJson isJson = new GenericUDFIsJson(); + ObjectInspector jsonOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] initArgs = {jsonOI}; + isJson.initialize(initArgs); + + for (Map.Entry input : inputs.entrySet()) { + GenericUDF.DeferredObject jsonStr = new GenericUDF.DeferredJavaObject(new Text(input.getKey())); + GenericUDF.DeferredObject[] execArgs = {jsonStr}; + BooleanWritable result = (BooleanWritable)isJson.evaluate(execArgs); + Assert.assertNotNull(result); + Assert.assertEquals(input.getValue(), result.get()); + } + } + + private Boolean test(String json) throws HiveException { + GenericUDFIsJson isJson = new GenericUDFIsJson(); + ObjectInspector jsonOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] initArgs = {jsonOI}; + isJson.initialize(initArgs); + + GenericUDF.DeferredObject jsonStr = new GenericUDF.DeferredJavaObject(json == null ? null : new Text(json)); + GenericUDF.DeferredObject[] execArgs = {jsonStr}; + BooleanWritable result = (BooleanWritable)isJson.evaluate(execArgs); + return result == null ? null : result.get(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonQuery.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonQuery.java new file mode 100644 index 0000000000..8863636951 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonQuery.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Collections; + +public class TestGenericUDFJsonQuery extends BaseTestGenericUDFJson { + + @Test + public void noReturnSpecified() throws HiveException { + ObjectPair results = test(json, "$.name"); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("chris", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("tracy", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void strSpecified() throws HiveException { + ObjectPair results = test(json, "$.name", wrapInList("a")); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("chris", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("tracy", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void longBad() throws HiveException { + try { + test(json, "$.age", wrapInList(1L)); + Assert.fail(); + } catch (UDFArgumentException e) { + Assert.assertEquals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFJsonQuery only returns String, Char, or Varchar", e.getMessage()); + } + } + + @Test + public void listBad() throws HiveException { + try { + test(json, "$.sports", wrapInList(Collections.singletonList("a"))); + Assert.fail(); + } catch (UDFArgumentException e) { + Assert.assertEquals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFJsonQuery only returns String, Char, or Varchar", e.getMessage()); + } + } + + @Test + public void list() throws HiveException { + ObjectPair results = test(json, "$.sports"); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("[\"baseball\",\"soccer\"]", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("[\"basketball\"]", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void nested() throws HiveException { + ObjectPair results = test(json, "$.nested"); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals("{\"nestedlist\":[{\"anothernest\":[10,100,1000]}]}", soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Override + protected GenericUDFJsonValue getUdf() { + return new GenericUDFJsonQuery(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonValue.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonValue.java new file mode 100644 index 0000000000..6ab14a6dd4 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFJsonValue.java @@ -0,0 +1,665 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.hadoop.hive.ql.udf.generic.GenericUDFJsonValue.WhatToReturn; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class TestGenericUDFJsonValue extends BaseTestGenericUDFJson { + + @Test + public void simpleString() throws HiveException { + ObjectPair results = test(json, "$.name"); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("chris", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("tracy", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void simpleLong() throws HiveException { + ObjectPair results = test(json, "$.age", wrapInList(1L)); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof LongObjectInspector); + LongObjectInspector loi = (LongObjectInspector)results.getFirst(); + Assert.assertEquals(21L, loi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(20L, loi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(loi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(loi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void simpleInt() throws HiveException { + ObjectPair results = test(json, "$.age", wrapInList(1)); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof IntObjectInspector); + IntObjectInspector ioi = (IntObjectInspector)results.getFirst(); + Assert.assertEquals(21, ioi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(20, ioi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(ioi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(ioi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void simpleDouble() throws HiveException { + ObjectPair results = test(json, "$.gpa", wrapInList(1.0)); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof DoubleObjectInspector); + DoubleObjectInspector doi = (DoubleObjectInspector)results.getFirst(); + Assert.assertEquals(3.24, doi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(3.94, doi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(doi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(doi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void simpleBoolean() throws HiveException { + ObjectPair results = test(json, "$.honors", wrapInList(true)); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof BooleanObjectInspector); + BooleanObjectInspector boi = (BooleanObjectInspector)results.getFirst(); + Assert.assertEquals(false, boi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(boi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(boi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void simpleListStr() throws HiveException { + ObjectPair results = test(json, "$.sports", + wrapInList(Collections.singletonList("a"))); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof ListObjectInspector); + ListObjectInspector loi = (ListObjectInspector)results.getFirst(); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof StringObjectInspector); + Assert.assertEquals(2, loi.getListLength(results.getSecond()[0])); + StringObjectInspector soi = (StringObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals("baseball", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[0], 0))); + Assert.assertEquals("soccer", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[0], 1))); + Assert.assertEquals(1, loi.getListLength(results.getSecond()[1])); + Assert.assertEquals("basketball", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[1], 0))); + Assert.assertNull(loi.getList(results.getSecond()[2])); + Assert.assertNull(loi.getList(results.getSecond()[3])); + } + + @Test + public void simpleListLong() throws HiveException { + ObjectPair results = test(json, "$.longlist", + wrapInList(Collections.singletonList(1L))); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof ListObjectInspector); + ListObjectInspector loi = (ListObjectInspector)results.getFirst(); + Assert.assertEquals(2, loi.getListLength(results.getSecond()[3])); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof LongObjectInspector); + LongObjectInspector lloi = (LongObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals(3L, lloi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 0))); + Assert.assertEquals(26L, lloi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + } + + @Test + public void simpleListDouble() throws HiveException { + ObjectPair results = test(json, "$.doublelist", + wrapInList(Collections.singletonList(1.0))); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof ListObjectInspector); + ListObjectInspector loi = (ListObjectInspector)results.getFirst(); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof DoubleObjectInspector); + DoubleObjectInspector doi = (DoubleObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals(2, loi.getListLength(results.getSecond()[3])); + Assert.assertEquals(3.52, doi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 0))); + Assert.assertEquals(2.86, doi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + } + + @Test + public void simpleListBool() throws HiveException { + ObjectPair results = test(json, "$.boollist", + wrapInList(Collections.singletonList(true))); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof ListObjectInspector); + ListObjectInspector loi = (ListObjectInspector)results.getFirst(); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof BooleanObjectInspector); + BooleanObjectInspector boi = (BooleanObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals(2, loi.getListLength(results.getSecond()[3])); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 0))); + Assert.assertEquals(false, boi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + } + + @Test + public void multitypeList() throws HiveException { + ObjectPair results = test(json, "$.multilist", + wrapInList(Collections.singletonList("a"))); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof ListObjectInspector); + ListObjectInspector loi = (ListObjectInspector)results.getFirst(); + Assert.assertEquals(5, loi.getListLength(results.getSecond()[3])); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals("string", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 0))); + Assert.assertEquals("1", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + Assert.assertEquals("2.3", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 2))); + Assert.assertEquals("FALSE", soi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 3))); + + results = test(json, "$.multilist", wrapInList(Collections.singletonList(1L))); + loi = (ListObjectInspector)results.getFirst(); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof LongObjectInspector); + LongObjectInspector lloi = (LongObjectInspector)loi.getListElementObjectInspector(); + Assert.assertNull(loi.getListElement(results.getSecond()[3], 0)); + Assert.assertEquals(1L, lloi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + Assert.assertEquals(2L, lloi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 2))); + Assert.assertEquals(0L, lloi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 3))); + + results = test(json, "$.multilist", wrapInList(Collections.singletonList(1.0))); + loi = (ListObjectInspector)results.getFirst(); + Assert.assertNull(loi.getListElement(results.getSecond()[3], 0)); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof DoubleObjectInspector); + DoubleObjectInspector doi = (DoubleObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals(1.0, doi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + Assert.assertEquals(2.3, doi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 2))); + Assert.assertEquals(0.0, doi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 3))); + + results = test(json, "$.multilist", wrapInList(Collections.singletonList(true))); + loi = (ListObjectInspector)results.getFirst(); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof BooleanObjectInspector); + BooleanObjectInspector boi = (BooleanObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 0))); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 1))); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 2))); + Assert.assertEquals(false, boi.getPrimitiveJavaObject(loi.getListElement(results.getSecond()[3], 3))); + + } + + @Test + public void scalarInList() throws HiveException { + ObjectPair results = test(json, "$.sports[0]"); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals("baseball", poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("basketball", poi.getPrimitiveJavaObject(results.getSecond()[1])); + + results = test(json, "$.longlist[1]", wrapInList(1L)); + Assert.assertTrue(results.getFirst() instanceof LongObjectInspector); + poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(26L, poi.getPrimitiveJavaObject(results.getSecond()[3])); + + results = test(json, "$.doublelist[1]", wrapInList(1.0)); + Assert.assertTrue(results.getFirst() instanceof DoubleObjectInspector); + poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(2.86, poi.getPrimitiveJavaObject(results.getSecond()[3])); + + results = test(json, "$.boollist[1]", wrapInList(true)); + Assert.assertTrue(results.getFirst() instanceof BooleanObjectInspector); + poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(false, poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void simpleObj() throws HiveException { + Map defaultVal = new HashMap<>(); + defaultVal.put("str", "a"); + defaultVal.put("long", 1L); + defaultVal.put("decimal", 1.0); + defaultVal.put("bool", true); + ObjectPair results = test(json, "$.subobj", wrapInList(defaultVal)); + + // First three return values should be null, last one should be a struct + for (int i = 0; i < 3; i++) Assert.assertNull(results.getSecond()[i]); + Assert.assertNotNull(results.getSecond()[3]); + Object struct = results.getSecond()[3]; + + // We should have a StructObjectInspector + Assert.assertTrue(results.getFirst() instanceof StructObjectInspector); + StructObjectInspector soi = (StructObjectInspector)results.getFirst(); + + // Look for the string + StructField sf = soi.getStructFieldRef("str"); + Assert.assertNotNull(sf); + Assert.assertEquals("str", sf.getFieldName()); + Assert.assertTrue(sf.getFieldObjectInspector() instanceof StringObjectInspector); + Assert.assertEquals("strval", ((StringObjectInspector)sf.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(struct, sf))); + + // Look for the long + sf = soi.getStructFieldRef("long"); + Assert.assertNotNull(sf); + Assert.assertEquals("long", sf.getFieldName()); + Assert.assertTrue(sf.getFieldObjectInspector() instanceof LongObjectInspector); + Assert.assertEquals(-1L, ((LongObjectInspector)sf.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(struct, sf))); + + // Look for the double + sf = soi.getStructFieldRef("decimal"); + Assert.assertNotNull(sf); + Assert.assertEquals("decimal", sf.getFieldName()); + Assert.assertTrue(sf.getFieldObjectInspector() instanceof DoubleObjectInspector); + Assert.assertEquals(-2.2, ((DoubleObjectInspector)sf.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(struct, sf))); + + // Look for the boolean + sf = soi.getStructFieldRef("bool"); + Assert.assertNotNull(sf); + Assert.assertEquals("bool", sf.getFieldName()); + Assert.assertTrue(sf.getFieldObjectInspector() instanceof BooleanObjectInspector); + Assert.assertEquals(true, ((BooleanObjectInspector)sf.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(struct, sf))); + } + + @Test + public void scalarInObj() throws HiveException { + ObjectPair results = test(json, "$.subobj.str"); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals("strval", poi.getPrimitiveJavaObject(results.getSecond()[3])); + + results = test(json, "$.subobj.long", wrapInList(1L)); + poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(-1L, poi.getPrimitiveJavaObject(results.getSecond()[3])); + + results = test(json, "$.subobj.decimal", wrapInList(1.0)); + poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(-2.2, poi.getPrimitiveJavaObject(results.getSecond()[3])); + + results = test(json, "$.subobj.bool", wrapInList(true)); + poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void nested() throws HiveException { + ObjectPair results = test(json, "$.nested.nestedlist[0].anothernest[2]", wrapInList(1)); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals(1000, poi.getPrimitiveJavaObject(results.getSecond()[3])); + + Map defaultVal = new HashMap<>(); + defaultVal.put("anothernest", Arrays.asList(10L, 100L, 1000L)); + results = test(json, "$.nested.nestedlist[0]", wrapInList(defaultVal)); + StructObjectInspector soi = (StructObjectInspector)results.getFirst(); + + StructField sf = soi.getStructFieldRef("anothernest"); + Assert.assertNotNull(sf); + Assert.assertEquals("anothernest", sf.getFieldName()); + Assert.assertTrue(sf.getFieldObjectInspector() instanceof ListObjectInspector); + ListObjectInspector loi = (ListObjectInspector)sf.getFieldObjectInspector(); + List list = ((ListObjectInspector) sf.getFieldObjectInspector()).getList(soi.getStructFieldData(results.getSecond()[3], sf)); + Assert.assertEquals(3, loi.getListLength(list)); + Assert.assertTrue(loi.getListElementObjectInspector() instanceof LongObjectInspector); + LongObjectInspector lloi = (LongObjectInspector)loi.getListElementObjectInspector(); + Assert.assertEquals(10L, lloi.getPrimitiveJavaObject(loi.getListElement(list, 0))); + Assert.assertEquals(100L, lloi.getPrimitiveJavaObject(loi.getListElement(list, 1))); + Assert.assertEquals(1000L, lloi.getPrimitiveJavaObject(loi.getListElement(list, 2))); + } + + + @Test + public void longAsString() throws HiveException { + ObjectPair results = test(json, "$.age"); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals("21", poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("20", poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void doubleAsString() throws HiveException { + ObjectPair results = test(json, "$.gpa"); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals("3.24", poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("3.94", poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void boolAsString() throws HiveException { + ObjectPair results = test(json, "$.honors"); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector)results.getFirst(); + Assert.assertEquals("FALSE", poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("TRUE", poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void stringAsLong() throws HiveException { + ObjectPair results = test(json, "$.name", wrapInList(1L)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[0])); + results = test(json, "$.longstring", wrapInList(1L)); + Assert.assertEquals(4, results.getSecond().length); + poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(42L, poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void doubleAsLong() throws HiveException { + ObjectPair results = test(json, "$.gpa", wrapInList(1L)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(3L, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(3L, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void booleanAsLong() throws HiveException { + ObjectPair results = test(json, "$.honors", wrapInList(1L)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + // Hive allows booleans to be cast to longs. This is sick and wrong, but ok + Assert.assertEquals(0L, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(1L, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void stringAsDouble() throws HiveException { + ObjectPair results = test(json, "$.name", wrapInList(1.0)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[0])); + results = test(json, "$.doublestring", wrapInList(1.0)); + Assert.assertEquals(4, results.getSecond().length); + poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(3.1415, poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void longAsDouble() throws HiveException { + ObjectPair results = test(json, "$.age", wrapInList(1.0)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(21.0, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(20.0, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void booleanAsDouble() throws HiveException { + ObjectPair results = test(json, "$.honors", wrapInList(1.0)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + // Hive allows booleans to be cast to doubles. This is sick and wrong, but ok + Assert.assertEquals(0.0, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(1.0, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void stringAsBoolean() throws HiveException { + ObjectPair results = test(json, "$.name", wrapInList(true)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + results = test(json, "$.boolstring", wrapInList(true)); + Assert.assertEquals(4, results.getSecond().length); + poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void longAsBoolean() throws HiveException { + ObjectPair results = test(json, "$.age", wrapInList(true)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void doubleAsBoolean() throws HiveException { + ObjectPair results = test(json, "$.gpa", wrapInList(true)); + Assert.assertEquals(4, results.getSecond().length); + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) results.getFirst(); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(true, poi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(poi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void unsupportedReturnType() throws HiveException { + try { + test(json, "$.gpa", wrapInList(3.0f)); + } catch (UDFArgumentTypeException e) { + Assert.assertEquals("jsonvalue can return string, int, long, double, boolean, array of one of these, or struct with these", e.getMessage()); + } + } + + @Test + public void stringStaticDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.name", + wrapInList("fred"), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("chris", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("tracy", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals("fred", soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void longStaticDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.age", + wrapInList(87L), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof LongObjectInspector); + LongObjectInspector loi = (LongObjectInspector)results.getFirst(); + Assert.assertEquals(21L, loi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(20L, loi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(loi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(87L, loi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void intStaticDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.age", + wrapInList(93), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof IntObjectInspector); + IntObjectInspector ioi = (IntObjectInspector)results.getFirst(); + Assert.assertEquals(21, ioi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(20, ioi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(ioi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(93, ioi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void doubleStaticDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.gpa", + wrapInList(2.5), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof DoubleObjectInspector); + DoubleObjectInspector doi = (DoubleObjectInspector)results.getFirst(); + Assert.assertEquals(3.24, doi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(3.94, doi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(doi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(2.5, doi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void boolStaticDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.honors", + wrapInList(true), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof BooleanObjectInspector); + BooleanObjectInspector boi = (BooleanObjectInspector)results.getFirst(); + Assert.assertEquals(false, boi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(boi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void stringDynamicDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.name", + Arrays.asList("one", "two", "three", "four"), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof StringObjectInspector); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("chris", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("tracy", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals("four", soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void longDynamicDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.age", + Arrays.asList(1L, 2L, 3L, 4L), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof LongObjectInspector); + LongObjectInspector loi = (LongObjectInspector)results.getFirst(); + Assert.assertEquals(21L, loi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(20L, loi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(loi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(4L, loi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void doubleDynamicDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.gpa", + Arrays.asList(1.0, 2.0, 3.0, 4.0), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof DoubleObjectInspector); + DoubleObjectInspector doi = (DoubleObjectInspector)results.getFirst(); + Assert.assertEquals(3.24, doi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(3.94, doi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(doi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(4.0, doi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void booleanDynamicDefaultOnEmpty() throws HiveException { + ObjectPair results = test(json, "$.honors", + Arrays.asList(false, false, false, true), WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + Assert.assertTrue(results.getFirst() instanceof BooleanObjectInspector); + BooleanObjectInspector boi = (BooleanObjectInspector)results.getFirst(); + Assert.assertEquals(false, boi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(boi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertEquals(true, boi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void defaultOnError() throws HiveException { + ObjectPair results = test(json, "$.subobj?(@.str == 5)", + wrapInList("fred"), WhatToReturn.NULL, WhatToReturn.DEFAULT); + Assert.assertEquals(4, results.getSecond().length); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + // The error will only occur on the final entry because it is the only one with 'subobj' key. The filter + // won't be evaluated on all the other records. + Assert.assertEquals("fred", soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void nullOnError() throws HiveException { + ObjectPair results = test(json, "$.subobj?(@.str == 5)"); + Assert.assertEquals(4, results.getSecond().length); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + // The error will only occur on the final entry because it is the only one with 'subobj' key. The filter + // won't be evaluated on all the other records. + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + + @Test + public void badSyntax() throws HiveException { + try { + test(json, "$.nosuchfunc()"); + Assert.fail(); + } catch (UDFArgumentException e) { + Assert.assertTrue(e.getMessage().startsWith("Failed to parse JSON path exception: '$.nosuchfunc()'")); + } + } + + @Test + public void errorOnEmpty() { + try { + test(json, "$.nosuch", wrapInList("a"), WhatToReturn.ERROR); + Assert.fail(); + } catch (HiveException e) { + Assert.assertTrue(e.getMessage().contains("Result of path expression is empty")); + } + } + + @Test + public void errorOnError() { + try { + test(json, "$.subobj?(@.str == 5)", wrapInList("a"), WhatToReturn.NULL, WhatToReturn.ERROR); + Assert.fail(); + } catch (HiveException e) { + Assert.assertTrue(e.getMessage().contains("produced a semantic error: Cannot compare a string to a long at \"@.str == 5\"")); + } + } + + @Test + public void passing() throws HiveException { + List> passingVals = Arrays.asList( + Collections.singletonMap("index", 1), + Collections.singletonMap("index", 0), + Collections.singletonMap("index", 0), + Collections.singletonMap("index", 2) + ); + + ObjectPair results = test(json, "$.sports[$index]", wrapInList("a"), + WhatToReturn.NULL, WhatToReturn.NULL, passingVals); + Assert.assertEquals(4, results.getSecond().length); + StringObjectInspector soi = (StringObjectInspector)results.getFirst(); + Assert.assertEquals("soccer", soi.getPrimitiveJavaObject(results.getSecond()[0])); + Assert.assertEquals("basketball", soi.getPrimitiveJavaObject(results.getSecond()[1])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[2])); + Assert.assertNull(soi.getPrimitiveJavaObject(results.getSecond()[3])); + } + @Override + protected GenericUDFJsonValue getUdf() { + return new GenericUDFJsonValue(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/TestJsonValueParser.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/TestJsonValueParser.java new file mode 100644 index 0000000000..8343e1fde6 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/TestJsonValueParser.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +public class TestJsonValueParser { + + private static JsonValueParser parser; + + // Done once to test that re-using the parser works + @BeforeClass + public static void createParser() { + ErrorListener errorListener = new ErrorListener(); + parser = new JsonValueParser(errorListener); + } + + @Test + public void empty() throws IOException, JsonPathException { + JsonSequence json = parser.parse("{}"); + Assert.assertTrue(json.isObject()); + Assert.assertEquals(0, json.asObject().size()); + } + + @Test + public void justString() throws IOException, JsonPathException { + JsonSequence json = parser.parse("{ \"name\" : \"fred\" }"); + Assert.assertTrue(json.isObject()); + Assert.assertEquals(1, json.asObject().size()); + Assert.assertEquals(new JsonSequence(Collections.singletonMap("name", new JsonSequence("fred"))), json); + } + + @Test + public void justInt() throws IOException, JsonPathException { + JsonSequence json = parser.parse("{ \"age\" : 10 }"); + Assert.assertTrue(json.isObject()); + Assert.assertEquals(1, json.asObject().size()); + Assert.assertEquals(new JsonSequence(Collections.singletonMap("age", new JsonSequence(10))), json); + } + + @Test + public void simple() throws IOException, JsonPathException { + JsonSequence json = parser.parse("{" + + "\"name\" : \"clark kent\"," + + "\"age\" : 53," + + "\"gpa\" : 3.97," + + "\"honor roll\" : true," + + "\"major\" : null," + + "\"classes\" : [ \"math 101\", \"history 101\" ]," + + "\"sports\" : [ ]" + + "}"); + + Map m = new HashMap<>(); + m.put("name", new JsonSequence("clark kent")); + m.put("age", new JsonSequence(53L)); + m.put("gpa", new JsonSequence(3.97)); + m.put("honor roll", JsonSequence.trueJsonSequence); + m.put("major", JsonSequence.nullJsonSequence); + List l = new ArrayList<>(); + l.add(new JsonSequence("math 101")); + l.add(new JsonSequence("history 101")); + m.put("classes", new JsonSequence(l)); + m.put("sports", new JsonSequence(Collections.emptyList())); + JsonSequence expected = new JsonSequence(m); + + Assert.assertEquals(expected, json); + } + + @Test + public void nested() throws IOException, JsonPathException { + JsonSequence json = parser.parse("{" + + "\"name\" : \"diana prince\"," + + "\"address\" : {" + + " \"street\" : \"123 amazon street\"," + + " \"zip\" : 12345" + + " }," + + "\"classes\" : [" + + " {" + + " \"class\" : \"math 101\"," + + " \"professor\" : \"xavier\"" + + " }, {" + + " \"class\" : \"history 101\"," + + " \"professor\" : \"who\"" + + " }" + + " ]" + + "}"); + + Map m = new HashMap<>(); + m.put("name", new JsonSequence("diana prince")); + Map m1 = new HashMap<>(); + m1.put("street", new JsonSequence("123 amazon street")); + m1.put("zip", new JsonSequence(12345L)); + m.put("address", new JsonSequence(m1)); + List l = new ArrayList<>(); + Map m2 = new HashMap<>(); + m2.put("class", new JsonSequence("math 101")); + m2.put("professor", new JsonSequence("xavier")); + l.add(new JsonSequence(m2)); + Map m3 = new HashMap<>(); + m3.put("class", new JsonSequence("history 101")); + m3.put("professor", new JsonSequence("who")); + l.add(new JsonSequence(m3)); + m.put("classes", new JsonSequence(l)); + JsonSequence expected = new JsonSequence(m); + + Assert.assertEquals(expected, json); + } + + @Test + public void syntaxError() throws IOException { + try { + parser.parse("{ \"oops\" }"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'{ \"oops\" }' produced a syntax error: mismatched input '}' expecting ':' on line 1 at position 9", e.getMessage()); + } + } + + @Test + public void negativeNumbers() throws IOException, JsonPathException { + JsonSequence json = parser.parse("{" + + "\"longnumber\" : -1," + + "\"decimalnumber\" : -10.1, " + + "\"anotherdecimal\" : -.9," + + "\"pluslong\" : +3," + + "\"plusdec\" : +3.1415" + + "}"); + + Assert.assertEquals(-1L, json.asObject().get("longnumber").asLong()); + Assert.assertEquals(-10.1, json.asObject().get("decimalnumber").asDouble(), 0.001); + Assert.assertEquals(-0.9, json.asObject().get("anotherdecimal").asDouble(), 0.001); + Assert.assertEquals(3L, json.asObject().get("pluslong").asLong()); + Assert.assertEquals(3.1415, json.asObject().get("plusdec").asDouble(), 0.001); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/TestPathExecutor.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/TestPathExecutor.java new file mode 100644 index 0000000000..a34382e932 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/sqljsonpath/TestPathExecutor.java @@ -0,0 +1,2410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic.sqljsonpath; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class TestPathExecutor { + + private static JsonValueParser valueParser; + private static JsonSequence emptyJson; + + @BeforeClass + public static void buildValueParser() throws IOException, JsonPathException { + ErrorListener listener = new ErrorListener(); + valueParser = new JsonValueParser(listener); + emptyJson = valueParser.parse("{ }"); + } + + @Test + public void syntaxError() throws IOException { + try { + PathParser parser = new PathParser(); + parser.parse("fizzbot"); + } catch (JsonPathException e) { + Assert.assertEquals("'fizzbot' produced a syntax error: no viable alternative at input 'fizzbot' on line 1 at position 0", e.getMessage()); + } + + } + + @Test + public void strictDefault() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("$.a"); + Assert.assertEquals(Mode.STRICT, pathExecResult.executor.getMode()); + } + + @Test + public void laxSpecified() throws IOException, JsonPathException { + try { + parseAndExecute("lax $.a"); + } catch (JsonPathException e) { + Assert.assertEquals("'lax $.a' produced a semantic error: lax mode not supported at \"lax\"", e.getMessage()); + } + } + + @Test + public void strictSpecified() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("strict $.a"); + Assert.assertEquals(Mode.STRICT, pathExecResult.executor.getMode()); + } + + @Test + public void longLiteral() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("5"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(5, pathExecResult.executor.returnedByVisit.asLong()); + + } + + @Test + public void doubleLiteral() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("5.1"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(5.1, pathExecResult.executor.returnedByVisit.asDouble(), 0.001); + } + + @Test + public void booleanLiteral() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("true"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isBool()); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.asBool()); + } + + @Test + public void nullLiteral() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("null"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isNull()); + } + + @Test + public void singleQuoteStringLiteral() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("'fred'"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isString()); + Assert.assertEquals("fred", pathExecResult.executor.returnedByVisit.asString()); + } + + @Test + public void doubleQuoteStringLiteral() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("\"fred\""); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isString()); + Assert.assertEquals("fred", pathExecResult.executor.returnedByVisit.asString()); + } + + @Test + public void addLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("5 + 6"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(11L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void subtractLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("8 - 4"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(4L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void multiplyLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("9 * 10"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(90L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void divideLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("9 / 3"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(3L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void modLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("10 % 3"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(1L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void addDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("5.1 + 7.2"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(12.3, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void subtractDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("10.0 - .2"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(9.8, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void multiplyDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("2.0 * 3.141592654"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(6.283185308, pathExecResult.executor.returnedByVisit.asDouble(), 0.001); + } + + @Test + public void divideDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("20.0 / 3.0"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(6.66666, pathExecResult.executor.returnedByVisit.asDouble(), 0.001); + } + + @Test + public void addLongAndDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("5 + 7.2"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(12.2, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void subtractLongAndDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("10 - 7.2"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(2.8, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void multiplyLongAndDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("10 * 1.238273"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(12.38273, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void divideLongAndDouble() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("20 / 1.238273"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(16.151527167272484, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void addDoubleAndLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("5.2 + 7"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(12.2, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + + } + + @Test + public void subtractDoubleAndLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("10.2 - 7"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(3.2, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void multiplyDoubleAndLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("1.238273 * 10"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(12.38273, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void divideDoubleAndLong() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("20.238273 / 3"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(6.746091, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void longUnaryPlus() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("+3"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(3L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void longUnaryMinus() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("-3"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isLong()); + Assert.assertEquals(-3L, pathExecResult.executor.returnedByVisit.asLong()); + } + + @Test + public void doubleUnaryPlus() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("+20.238273"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(20.238273, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void doubleUnaryMinus() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("-20.238273"); + Assert.assertTrue(pathExecResult.executor.returnedByVisit.isDouble()); + Assert.assertEquals(-20.238273, pathExecResult.executor.returnedByVisit.asDouble(), 0.00001); + } + + @Test + public void badLongAdd() throws IOException { + try { + String pathExpr = "20 + 'fred'"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 + 'fred'' produced a semantic error: You cannot do arithmetic on a string at \"20 + 'fred'\"", e.getMessage()); + } + } + + @Test + public void badLongSubtract() throws IOException { + try { + String pathExpr = "20 - 'fred'"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 - 'fred'' produced a semantic error: You cannot do arithmetic on a string at \"20 - 'fred'\"", e.getMessage()); + } + } + + @Test + public void badLongMultiply() throws IOException { + try { + String pathExpr = "20 * true"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 * true' produced a semantic error: You cannot do arithmetic on a bool at \"20 * true\"", e.getMessage()); + } + } + + @Test + public void badLongDivide() throws IOException { + try { + String pathExpr = "20 / 'bob'"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 / 'bob'' produced a semantic error: You cannot do arithmetic on a string at \"20 / 'bob'\"", e.getMessage()); + } + } + + @Test + public void badMod() throws IOException { + try { + String pathExpr = "20 % 3.0"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 % 3.0' produced a semantic error: You cannot do mod on a double at \"20 % 3.0\"", e.getMessage()); + } + } + + @Test + public void badStringAdd() throws IOException { + try { + String pathExpr = "'fred' + 3.0"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("''fred' + 3.0' produced a semantic error: You cannot do arithmetic on a string at \"'fred' + 3.0\"", e.getMessage()); + } + } + + @Test + public void badStringSubtract() throws IOException { + try { + String pathExpr = "'fred' - 3.0"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("''fred' - 3.0' produced a semantic error: You cannot do arithmetic on a string at \"'fred' - 3.0\"", e.getMessage()); + } + } + + @Test + public void badStringMultiply() throws IOException { + try { + String pathExpr = "'fred' * 3.0"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("''fred' * 3.0' produced a semantic error: You cannot do arithmetic on a string at \"'fred' * 3.0\"", e.getMessage()); + } + } + + @Test + public void badStringDivide() throws IOException { + try { + String pathExpr = "'fred' / 3.0"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("''fred' / 3.0' produced a semantic error: You cannot do arithmetic on a string at \"'fred' / 3.0\"", e.getMessage()); + } + } + + @Test + public void badStringMod() throws IOException { + try { + String pathExpr = "'fred' % 3.0"; + parseAndExecute(pathExpr); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("''fred' % 3.0' produced a semantic error: You cannot do mod on a string at \"'fred' % 3.0\"", e.getMessage()); + } + } + + @Test + public void addNull() throws IOException { + try { + parseAndExecute("20 + null"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 + null' produced a semantic error: You cannot do arithmetic on a null at \"20 + null\"", e.getMessage()); + } + } + + @Test + public void subtractNull() throws IOException { + try { + parseAndExecute("20.0 - null"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20.0 - null' produced a semantic error: You cannot do arithmetic on a null at \"20.0 - null\"", e.getMessage()); + } + } + + @Test + public void multiplyNull() throws IOException { + try { + parseAndExecute("20 * null"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 * null' produced a semantic error: You cannot do arithmetic on a null at \"20 * null\"", e.getMessage()); + } + } + + @Test + public void divideNull() throws IOException { + try { + parseAndExecute("20 / null"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 / null' produced a semantic error: You cannot do arithmetic on a null at \"20 / null\"", e.getMessage()); + } + } + + @Test + public void modNull() throws IOException { + try { + parseAndExecute("20 % null"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'20 % null' produced a semantic error: You cannot do mod on a null at \"20 % null\"", e.getMessage()); + } + } + + @Test + public void nullAdd() throws IOException { + try { + parseAndExecute("null + 20"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'null + 20' produced a semantic error: You cannot do arithmetic on a null at \"null + 20\"", e.getMessage()); + } + } + + @Test + public void nullSubtract() throws IOException { + try { + parseAndExecute("null - 20.0"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'null - 20.0' produced a semantic error: You cannot do arithmetic on a null at \"null - 20.0\"", e.getMessage()); + } + } + + @Test + public void nullMultiply() throws IOException { + try { + parseAndExecute("null * 20"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'null * 20' produced a semantic error: You cannot do arithmetic on a null at \"null * 20\"", e.getMessage()); + } + } + + @Test + public void nullDivide() throws IOException { + try { + parseAndExecute("null / 20"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'null / 20' produced a semantic error: You cannot do arithmetic on a null at \"null / 20\"", e.getMessage()); + } + } + + @Test + public void nullMod() throws IOException { + try { + parseAndExecute("null % 20"); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'null % 20' produced a semantic error: You cannot do mod on a null at \"null % 20\"", e.getMessage()); + } + } + + @Test + public void pathNamedVariable() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"pe\", \"history\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[$i]", json, Collections.singletonMap("i", new JsonSequence(1L))); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : \"history\" }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void pathNamedVariableNoMatchingId() throws IOException { + String pathExpr = "$fred"; + try { + parseAndExecute(pathExpr, emptyJson, Collections.singletonMap("bob", new JsonSequence(5L))); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'" + pathExpr + "' produced a semantic error: Variable fred" + + " referenced in path expression but no matching id found in passing clause at \"$ fred\"", e.getMessage()); + } + } + + @Test + public void pathNamedVariableNullPassing() throws IOException { + String pathExpr = "$fred"; + try { + parseAndExecute(pathExpr, emptyJson); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'" + pathExpr + "' produced a semantic error: Variable fred" + + " referenced in path expression but no matching id found in passing clause at \"$ fred\"", e.getMessage()); + } + } + + @Test + public void pathNamedVariableEmptyPassing() throws IOException { + String pathExpr = "$fred"; + try { + parseAndExecute(pathExpr, emptyJson, Collections.emptyMap()); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'" + pathExpr + "' produced a semantic error: Variable fred" + + " referenced in path expression but no matching id found in passing clause at \"$ fred\"", e.getMessage()); + } + } + + @Test + public void fullMatch() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$", json); + Assert.assertEquals(json, pathExecResult.match); + } + + @Test + public void matchKeyString() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.name", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("fred", pathExecResult.match.asString()); + } + + @Test + public void matchKeyLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(35, pathExecResult.match.asLong()); + } + + @Test + public void matchKeyDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\", \"gpa\" : 2.73 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa", json); + Assert.assertTrue(pathExecResult.match.isDouble()); + Assert.assertEquals(2.73, pathExecResult.match.asDouble(), 0.001); + } + + @Test + public void matchKeyBool() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\", \"honor roll\" : false }"); + PathExecutionResult pathExecResult = parseAndExecute("$.\"honor roll\"", json); + Assert.assertTrue(pathExecResult.match.isBool()); + Assert.assertFalse(pathExecResult.match.asBool()); + } + + @Test + public void matchKeyNull() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\", \"sports\" : null }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports", json); + Assert.assertTrue(pathExecResult.match.isNull()); + } + + @Test + public void matchKeyQuotes() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.\"name\"", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("fred", pathExecResult.match.asString()); + } + + @Test + public void noMatchKey() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.address", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void noMatchKeyQuotes() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.\"address\"", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void objectWildcard() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(" { \"name\" : \"fred\", \"age\" : 35 }"); + JsonSequence expected = new JsonSequence(json); + PathExecutionResult pathExecResult = parseAndExecute("$.*", json); + + + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void objectWildcardEmpty() throws IOException, JsonPathException { + PathExecutionResult pathExecResult = parseAndExecute("$.*", emptyJson); + Assert.assertEquals(emptyJson, pathExecResult.match); + } + + @Test + public void simpleSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"science\", \"art\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[0]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : \"science\" }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void simpleSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[1]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : \"art\", \"sports\" : \"baseball\" }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void lastSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"science\", \"art\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[last]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : \"art\" }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void lastSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[last]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : \"art\", \"sports\" : \"baseball\" }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void toSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[1 to 3]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : [ \"art\", \"math\", \"history\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void toSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\", \"volleyball\", \"soccer\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[1 to 3]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : [ \"art\", \"math\", \"history\"]," + + "\"sports\" : [ \"baseball\", \"volleyball\", \"soccer\" ] }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void toLastSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[1 to last]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : [ \"art\", \"math\", \"history\", \"writing\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void toLastSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\", \"volleyball\", \"soccer\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[1 to last]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : [ \"art\", \"math\", \"history\", \"writing\"]," + + " \"sports\" : [ \"baseball\", \"volleyball\", \"soccer\" ] }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void notAnArraySubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"science\", \"art\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.name[0]", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void notAnArraySubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[1]", json); + + Assert.assertEquals(emptyJson, pathExecResult.match); + } + + @Test + public void offEndSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"science\", \"art\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[3]", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void offEndSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\", \"soccer\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[2]", json); + + JsonSequence expected = valueParser.parse(" { \"sports\" : \"soccer\" }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void toOffEndSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[3 to 5]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : [ \"history\", \"writing\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void toOffEndSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\", \"volleyball\", \"soccer\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[3 to 5]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : [ \"history\", \"writing\"], \"sports\" : [ \"soccer\" ] }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void listSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[1, 4]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : [ \"art\", \"writing\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void listSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\", \"volleyball\", \"soccer\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[1, 4]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : [ \"art\", \"writing\"], \"sports\" : [ \"baseball\" ] }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void listAndToSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[0, 3 to 5]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : [ \"science\", \"history\", \"writing\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void listAndToSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\", \"volleyball\", \"soccer\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[0, 2 to last]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : [ \"science\", \"math\", \"history\", \"writing\"]," + + " \"sports\" : [ \"swimming\", \"volleyball\", \"soccer\" ] }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void arithmeticSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\", \"history\", \"writing\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[1 + 1]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : \"math\" }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void wildcardSubscriptList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]", json); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : [ \"science\", \"art\", \"math\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void wildcardSubscriptObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\"," + + "\"classes\" : [ \"science\", \"art\", \"math\" ]," + + "\"sports\" : [ \"swimming\", \"baseball\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.*[*]", json); + + JsonSequence expected = valueParser.parse(" { \"classes\" : [ \"science\", \"art\", \"math\"]," + + " \"sports\" : [ \"swimming\", \"baseball\" ] }"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void typeLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("number", pathExecResult.match.asString()); + } + + @Test + public void typeDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("number", pathExecResult.match.asString()); + } + + @Test + public void typeString() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.name.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("string", pathExecResult.match.asString()); + } + + @Test + public void typeNull() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35, \"sports\" : null }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("null", pathExecResult.match.asString()); + } + + @Test + public void typeBoolean() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35, \"honor roll\" : true }"); + PathExecutionResult pathExecResult = parseAndExecute("$.\'honor roll\'.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("boolean", pathExecResult.match.asString()); + } + + @Test + public void typeList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"art\", \"math\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("array", pathExecResult.match.asString()); + } + + @Test + public void typeObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"address\" : { \"street\" : \"123 main\", \"city\" : \"phoenix\" } }"); + PathExecutionResult pathExecResult = parseAndExecute("$.address.type()", json); + Assert.assertTrue(pathExecResult.match.isString()); + Assert.assertEquals("object", pathExecResult.match.asString()); + } + + @Test + public void typeEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.type()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void sizeScalar() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35, \"honor roll\" : true }"); + PathExecutionResult pathExecResult = parseAndExecute("$.\'honor roll\'.size()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(1, pathExecResult.match.asLong()); + } + + @Test + public void sizeList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"classes\" : [ \"art\", \"math\" ] }"); + PathExecutionResult pathExecResult = parseAndExecute("$.classes.size()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(2, pathExecResult.match.asLong()); + } + + @Test + public void sizeObject() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"address\" : { \"street\" : \"123 main\", \"city\" : \"phoenix\" } }"); + PathExecutionResult pathExecResult = parseAndExecute("$.address.size()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(2, pathExecResult.match.asLong()); + } + + @Test + public void sizeEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.size()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void doubleLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.double()", json); + Assert.assertTrue(pathExecResult.match.isDouble()); + Assert.assertEquals(35.0, pathExecResult.match.asDouble(), 0.001); + } + + @Test + public void doubleDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.double()", json); + Assert.assertTrue(pathExecResult.match.isDouble()); + Assert.assertEquals(3.58, pathExecResult.match.asDouble(), 0.001); + } + + @Test + public void doubleString() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : \"3.58\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.double()", json); + Assert.assertTrue(pathExecResult.match.isDouble()); + Assert.assertEquals(3.58, pathExecResult.match.asDouble(), 0.001); + } + + @Test + public void doubleNotStringOrNumeric() throws IOException { + try { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"honor roll\" : true }"); + parseAndExecute("$.\"honor roll\".double()", json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.\"honor roll\".double()' produced a runtime error: Double method" + + " requires numeric or string argument, passed a bool at \"double ( )\"", e.getMessage()); + + } + } + + @Test + public void doubleEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports.double()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void intLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.integer()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(35, pathExecResult.match.asLong()); + } + + @Test + public void intDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.integer()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(3, pathExecResult.match.asLong()); + } + + @Test + public void intString() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : \"35\" }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.integer()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(35, pathExecResult.match.asLong()); + } + + @Test + public void intNotStringOrNumeric() throws IOException { + try { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"honor roll\" : true }"); + parseAndExecute("$.\"honor roll\".integer()", json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.\"honor roll\".integer()' produced a runtime error: Integer method" + + " requires numeric or string argument, passed a bool at \"integer ( )\"", e.getMessage()); + + } + } + + @Test + public void intEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports.integer()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void ceilingLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.ceiling()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(35, pathExecResult.match.asLong()); + } + + @Test + public void ceilingDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.ceiling()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(4, pathExecResult.match.asLong()); + } + + @Test + public void ceilingNotNumeric() throws IOException { + try { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"honor roll\" : true }"); + parseAndExecute("$.name.ceiling()", json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.name.ceiling()' produced a runtime error: Ceiling method" + + " requires numeric argument, passed a string at \"ceiling ( )\"", e.getMessage()); + + } + } + + @Test + public void ceilingEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports.ceiling()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void floorLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.floor()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(35, pathExecResult.match.asLong()); + } + + @Test + public void floorDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.floor()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(3, pathExecResult.match.asLong()); + } + + @Test + public void floorNotNumeric() throws IOException { + try { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"honor roll\" : true }"); + parseAndExecute("$.name.floor()", json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.name.floor()' produced a runtime error: Floor method" + + " requires numeric argument, passed a string at \"floor ( )\"", e.getMessage()); + + } + } + + @Test + public void floorEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports.floor()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + @Test + public void absLong() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"age\" : 35 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.age.abs()", json); + Assert.assertTrue(pathExecResult.match.isLong()); + Assert.assertEquals(35, pathExecResult.match.asLong()); + } + + @Test + public void absDouble() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : -3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.gpa.abs()", json); + Assert.assertTrue(pathExecResult.match.isDouble()); + Assert.assertEquals(3.58, pathExecResult.match.asDouble(), 0.001); + } + + @Test + public void absNotNumeric() throws IOException { + try { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"honor roll\" : true }"); + parseAndExecute("$.name.abs()", json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.name.abs()' produced a runtime error: Abs method" + + " requires numeric argument, passed a string at \"abs ( )\"", e.getMessage()); + + } + } + + @Test + public void absEmpty() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse("{ \"name\" : \"fred\", \"gpa\" : 3.58 }"); + PathExecutionResult pathExecResult = parseAndExecute("$.sports.abs()", json); + Assert.assertTrue(pathExecResult.match.isEmpty()); + } + + String bigHarryDeepThing = + "{" + + " \"name\" : \"fred\"," + + " \"classes\" : [ " + + " {" + + " \"name\" : \"science\"," + + " \"professor\" : \"d. who\"," + + " \"texts\" : [" + + " {" + + " \"title\" : \"intro to physics\"," + + " \"author\" : \"i. newton\"" + + " }, {" + + " \"title\" : \"intro to biology\"," + + " \"author\" : \"c. darwin\"" + + " }" + + " ]" + + " }, {" + + " \"name\" : \"art\"," + + " \"professor\" : \"v. van gogh\"" + + " }" + + " ]" + + "}"; + + @Test + public void multiLevelMemberAccess() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(bigHarryDeepThing); + Map passing = new HashMap<>(); + passing.put("class", new JsonSequence(0)); + passing.put("text", new JsonSequence(1)); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[$class].texts[$text].author", json, passing); + + JsonSequence wrappedExpected = valueParser.parse(" { \"k\" : \"c. darwin\" }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void multiLevelMemberList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(bigHarryDeepThing); + Map passing = new HashMap<>(); + passing.put("class", new JsonSequence(0)); + passing.put("text", new JsonSequence(1)); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[$class].texts[*].author", json, passing); + + JsonSequence wrappedExpected = valueParser.parse("{ \"k\" : [ \"i. newton\", \"c. darwin\" ] }"); + Assert.assertEquals(wrappedExpected.asObject().get("k"), pathExecResult.match); + } + + @Test + public void multiLevelMemberListNotSpecifiedStrict() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(bigHarryDeepThing); + Map passing = new HashMap<>(); + passing.put("class", new JsonSequence(0)); + passing.put("text", new JsonSequence(1)); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[$class].texts.author", json, passing); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterExists() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.city))", json); + + JsonSequence expected = valueParser.parse( + "{" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterListExists() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"classes\" : [" + + "{" + + "\"department\" : \"history\"," + + "\"number\" : 202," + + "\"professor\" : \"Who\"," + + "\"prerequisites\" : [ \"history 201\" ]" + + "}, {" + + "\"department\" : \"music\"," + + "\"number\" : 101," + + "\"professor\" : \"Beethoven\"" + + "}" + + "]" + + "}"); + + JsonSequence expected = valueParser.parse( + "{" + + "\"bogus\" : [" + + "{" + + "\"department\" : \"history\"," + + "\"number\" : 202," + + "\"professor\" : \"Who\"," + + "\"prerequisites\" : [ \"history 201\" ]" + + "}" + + "]" + + "}"); + + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(exists(@.prerequisites))", json); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + + } + + @Test + public void filterNonExistent() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.state))", json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterNotExists() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(!exists(@.state))", json); + + JsonSequence expected = valueParser.parse( + "{" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterAndTrue() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.city) && exists(@.zip))", json); + + JsonSequence expected = valueParser.parse( + "{" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterAndFirstFalse() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.state) && exists(@.zip))", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterAndSecondFalse() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.city) && exists(@.state))", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterOrBothTrue() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.city) || exists(@.zip))", json); + + JsonSequence expected = valueParser.parse( + "{" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterOrFirstTrue() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.city) || exists(@.state))", json); + + JsonSequence expected = valueParser.parse( + "{" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterOrSecondTrue() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.state) || exists(@.zip))", json); + + JsonSequence expected = valueParser.parse( + "{" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}"); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterBothFalse() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse( + "{" + + "\"name\" : \"fred\"," + + "\"address\" : {" + + "\"street\" : \"123 Main\"," + + "\"city\" : \"Springfield\"," + + "\"zip\" : 12345" + + "}" + + "}"); + PathExecutionResult pathExecResult = parseAndExecute("$.address?(exists(@.state) || exists(@.country))", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + // The following is used in many of the following (in)equality tests. + private String equalityJson = + "{" + + "\"name\" : \"fred\"," + + "\"education\" : {" + + "\"school\" : \"usc\"," + + "\"years attended\" : 4," + + "\"gpa\" : 3.29," + + "\"graduated\" : true," + + "\"sports\" : null," + + "\"activities\" : [ \"pkg\", \"ddd\" ]," + + "\"clubs\" : [ \"pkg\", \"ddd\" ]," + + "\"extracurricular\" : [ \"soup kitchen volunteer\" ]," + + "\"best class\" : { \"dept\" : \"math\", \"course\" : \"101\" }," + + "\"favorite class\" : { \"dept\" : \"math\", \"course\" : \"101\" }," + + "\"worst class\" : { \"dept\" : \"art\", \"course\" : \"201\" }" + + "}" + + "}"; + + private String expectedEqualityJson = + "{" + + "\"school\" : \"usc\"," + + "\"years attended\" : 4," + + "\"gpa\" : 3.29," + + "\"graduated\" : true," + + "\"sports\" : null," + + "\"activities\" : [ \"pkg\", \"ddd\" ]," + + "\"clubs\" : [ \"pkg\", \"ddd\" ]," + + "\"extracurricular\" : [ \"soup kitchen volunteer\" ]," + + "\"best class\" : { \"dept\" : \"math\", \"course\" : \"101\" }," + + "\"favorite class\" : { \"dept\" : \"math\", \"course\" : \"101\" }," + + "\"worst class\" : { \"dept\" : \"art\", \"course\" : \"201\" }" + + "}"; + + @Test + public void filterLongEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" == 4)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" == 3)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLongDoubleEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" == 4.0)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongDoubleEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" == 3.0)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterDoubleEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa == 3.29)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterDoubleEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa == 3.00)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStringEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school == \"usc\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStringEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school == \"ucla\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBoolEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated == true)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterBoolEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated == false)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterNullEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.sports == null)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterNullEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated == null)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterListEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.activities == @.clubs)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterListEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.activities == @.extracurricular)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterObjectEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"best class\" == @.\"favorite class\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterObjectEqualsFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"best class\" == @.\"worst class\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterEmptyEquals() throws IOException, JsonPathException { + // Test that the right thing happens when the preceding path is empty + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.nosuch?(@.graduated == true)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBadTypesEquals() throws IOException { + try { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated == 3.141592654)", json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.graduated == 3.141592654)' produced a semantic error: Cannot compare a bool to a double at \"@.graduated == 3.141592654\"", e.getMessage()); + } + } + + @Test + public void filterLongNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" != 5)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" != 4)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLongDoubleNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" != 4.1)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongDoubleNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" != 4.0)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterDoubleNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa != 3.19)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterDoubleNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa != 3.29)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStringNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school != \"ucla\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStringNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school != \"usc\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBoolNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated != false)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterBoolNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated <> true)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterNullNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.graduated <> null)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterNullNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.sports != null)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterListNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.activities != @.extracurricular)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterListNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.activities != @.clubs)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterObjectNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"best class\" != @.\"worst class\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterObjectNeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"best class\" != @.\"favorite class\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterEmptyNe() throws IOException, JsonPathException { + // Test that the right thing happens when the preceding path is empty + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.nosuch?(@.graduated != true)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBadTypesNe() throws IOException { + try { + String path = "$.education?(@.graduated != 3.141592654)"; + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute(path, json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.graduated != 3.141592654)' produced a semantic error: Cannot compare a bool to a double at \"@.graduated != 3.141592654\"", e.getMessage()); + } + } + + @Test + public void filterLongLt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" < 5)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongLtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" < 4)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLongDoubleLt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" < 4.1)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongDoubleLtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" < 4.0)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterDoubleLt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa < 3.50)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterDoubleLtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa < 2.29)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStringLt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school < \"yyy\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStringLtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school < \"asc\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLtBadType() throws IOException { + try { + String path = "$.education?(@.graduated < false)"; + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute(path, json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.graduated < false)' produced a semantic error: Cannot apply an inequality operator to a bool at \"@.graduated < false\"", e.getMessage()); + } + } + + @Test + public void filterEmptyLt() throws IOException, JsonPathException { + // Test that the right thing happens when the preceding path is empty + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.nosuch?(@.gpa < 4.00)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBadTypesLt() throws IOException { + try { + String path = "$.education?(@.gpa < 'abc')"; + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute(path, json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.gpa < 'abc')' produced a semantic error: Cannot compare a decimal to a string at \"@.gpa < 'abc'\"", e.getMessage()); + } + } + + @Test + public void filterLongLe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" <= 4)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongLeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" <= 3)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLongDoubleLe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" <= 4.1)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongDoubleLeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" <= 3.99)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterDoubleLe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa <= 3.50)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterDoubleLeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa <= 2.29)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStringLe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school <= \"uscaaab\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStringLeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school <= \"us\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLeBadType() throws IOException { + try { + String path = "$.education?(@.graduated <= false)"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.graduated <= false)' produced a semantic error: Cannot apply an inequality operator to a bool at \"@.graduated <= false\"", e.getMessage()); + } + } + + @Test + public void filterEmptyLe() throws IOException, JsonPathException { + // Test that the right thing happens when the preceding path is empty + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.nosuch?(@.gpa <= 4.00)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBadTypesLe() throws IOException { + try { + String path = "$.education?(@.gpa <= 'abc')"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.gpa <= 'abc')' produced a semantic error: Cannot compare a decimal to a string at \"@.gpa <= 'abc'\"", e.getMessage()); + } + } + + @Test + public void filterLongGt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" > 3)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongGtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" > 4)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLongDoubleGt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" > 3.9)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongDoubleGtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" > 4.0)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterDoubleGt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa > 3.00)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterDoubleGtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa > 3.79)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStringGt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school > \"u\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStringGtFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school > \"z\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterGtBadType() throws IOException { + try { + String path = "$.education?(@.graduated > false)"; + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute(path, json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.graduated > false)' produced a semantic error: Cannot apply an inequality operator to a bool at \"@.graduated > false\"", e.getMessage()); + } + } + + @Test + public void filterEmptyGt() throws IOException, JsonPathException { + // Test that the right thing happens when the preceding path is empty + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.nosuch?(@.gpa > 4.00)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBadTypesGt() throws IOException { + try { + String path = "$.education?(@.gpa > 'abc')"; + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute(path, json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.gpa > 'abc')' produced a semantic error: Cannot compare a decimal to a string at \"@.gpa > 'abc'\"", e.getMessage()); + } + } + + @Test + public void filterLongGe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" >= 4)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongGeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" >= 5)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterLongDoubleGe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" >= 4.0)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterLongDoubleGeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.\"years attended\" >= 4.99)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterDoubleGe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa >= 3.29)", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterDoubleGeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.gpa >= 3.99)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStringGe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school >= \"usc\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStringGeFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school >= \"uxxx\")", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterGeBadType() throws IOException { + try { + String path = "$.education?(@.graduated >= false)"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.graduated >= false)' produced a semantic error: Cannot apply an inequality operator to a bool at \"@.graduated >= false\"", e.getMessage()); + } + } + + @Test + public void filterEmptyGe() throws IOException, JsonPathException { + // Test that the right thing happens when the preceding path is empty + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.nosuch?(@.gpa >= 4.00)", json); + + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterBadTypesGe() throws IOException { + try { + String path = "$.education?(@.gpa >= 'abc')"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.gpa >= 'abc')' produced a semantic error: Cannot compare a decimal to a string at \"@.gpa >= 'abc'\"", e.getMessage()); + } + } + + // TODO test equals double -long + // TODO test ne double -long + + // TODO test all the comparison predicates against a list + private String listEqualityJson = + "{" + + "\"name\" : \"fred\"," + + "\"classes\" : [" + + "{" + + "\"department\" : \"math\"," + + "\"number\" : 101," + + "\"avg attendance\" : 287.5," + + "\"honors\" : false," + + "\"prerequisites\" : null" + + "}, {" + + "\"department\" : \"art\"," + + "\"number\" : 401," + + "\"avg attendance\" : 7.0," + + "\"honors\" : true," + + "\"prerequisites\" : [ \"art 301\" ]" + + "}" + + "]" + + "}"; + + private String listEqualityJsonExpectedFirst = + "{ \"bogus\" : [" + + "{" + + "\"department\" : \"math\"," + + "\"number\" : 101," + + "\"avg attendance\" : 287.5," + + "\"honors\" : false," + + "\"prerequisites\" : null" + + "}" + + "] }"; + + private String listEqualityJsonExpectedSecond = + "{ \"bogus\" : [" + + "{" + + "\"department\" : \"art\"," + + "\"number\" : 401," + + "\"avg attendance\" : 7.0," + + "\"honors\" : true," + + "\"prerequisites\" : [ \"art 301\" ]" + + "}" + + "] }"; + + private String listEqualityJsonExpectedBoth = + "{ \"bogus\" : [" + + "{" + + "\"department\" : \"math\"," + + "\"number\" : 101," + + "\"avg attendance\" : 287.5," + + "\"honors\" : false," + + "\"prerequisites\" : null" + + "}, {" + + "\"department\" : \"art\"," + + "\"number\" : 401," + + "\"avg attendance\" : 7.0," + + "\"honors\" : true," + + "\"prerequisites\" : [ \"art 301\" ]" + + "}" + + "] }"; + + @Test + public void filterListLongEquals() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.number == 101)", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedFirst); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterListStringNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.department != \"science\")", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedBoth); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterListDoubleGt() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.\"avg attendance\" > 50)", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedFirst); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterListNullNe() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes?(@.prerequisites != null)", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedSecond); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterListOr() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.department == \"art\" || @.department == \"math\")", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedBoth); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterListAnd() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.number >= 200 && @.\"avg attendance\" > 20)", json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterRegex() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school like_regex \"u.c\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterRegexFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school like_regex \"u[x|y]c\")", json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterRegexList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.department like_regex \"^a.*\")", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedSecond); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterRegexBadtype() throws IOException { + try { + String path = "$.education?(@.gpa like_regex 'abc.*')"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.gpa like_regex 'abc.*')' produced a semantic error: Regular expressions can only be used on strings at \"@.gpa like_regex 'abc.*'\"", e.getMessage()); + } + } + + // TODO - figure out a regular expression that throws an error in the Java parser but not path parser + /* + @Test + public void filterRegexSyntaxError() throws IOException, JsonPathException { + String path = "$.education?(@.school like_regex '\\\\u12x abc')"; + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute(path, json); + try { + pathExecResult.errorListener.checkForErrors(path); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("", e.getMessage()); + } + } + */ + + @Test + public void filterStartsWith() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school starts with \"us\")", json); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStartsWithVal() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school starts with $match)", json, Collections.singletonMap("match", new JsonSequence("us"))); + + JsonSequence expected = valueParser.parse(expectedEqualityJson); + Assert.assertEquals(expected, pathExecResult.match); + } + + @Test + public void filterStartsWithFails() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(equalityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.education?(@.school starts with \"oregon\")", json); + Assert.assertEquals(JsonSequence.emptyResult, pathExecResult.match); + } + + @Test + public void filterStartsWithList() throws IOException, JsonPathException { + JsonSequence json = valueParser.parse(listEqualityJson); + PathExecutionResult pathExecResult = parseAndExecute("$.classes[*]?(@.department starts with \"a\")", json); + + JsonSequence expected = valueParser.parse(listEqualityJsonExpectedSecond); + Assert.assertEquals(expected.asObject().get("bogus"), pathExecResult.match); + } + + @Test + public void filterStartsWithBadtype() throws IOException { + try { + String path = "$.education?(@.gpa starts with 'abc')"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.gpa starts with 'abc')' produced a semantic error: Starts with can only be used with strings at \"@.gpa starts with 'abc'\"", e.getMessage()); + } + } + + @Test + public void filterStartsWithBadtype2() throws IOException { + try { + String path = "$.education?(@.school starts with $match)"; + JsonSequence json = valueParser.parse(equalityJson); + parseAndExecute(path, json, Collections.singletonMap("match", new JsonSequence(3.14))); + Assert.fail(); + } catch (JsonPathException e) { + Assert.assertEquals("'$.education?(@.school starts with $match)' produced a semantic error: Starts with can only be used with strings at \"@.school starts with $match\"", e.getMessage()); + } + } + + @Test + public void nullValue() throws IOException, JsonPathException { + PathExecutionResult result = parseAndExecute("$.name[1].phone", null); + Assert.assertTrue(result.match.isEmpty()); + } + + private PathExecutionResult parseAndExecute(String path) throws IOException, JsonPathException { + return parseAndExecute(path, emptyJson, null); + } + + private PathExecutionResult parseAndExecute(String path, JsonSequence values) throws IOException, JsonPathException { + return parseAndExecute(path, values, null); + } + + private PathExecutionResult parseAndExecute(String path, JsonSequence values, Map passing) + throws IOException, JsonPathException { + PathParser parser = new PathParser(); + PathParseResult parseResult = parser.parse(path); + PathExecutor executor = new PathExecutor(); + JsonSequence match = executor.execute(parseResult, values, passing); + return new PathExecutionResult(parseResult, executor, match); + } + + private static class PathExecutionResult { + final PathParseResult parseResult; + final PathExecutor executor; + final JsonSequence match; + + PathExecutionResult(PathParseResult parseResult, PathExecutor executor, JsonSequence match) { + this.parseResult = parseResult; + this.executor = executor; + this.match = match; + } + } +} diff --git ql/src/test/queries/clientpositive/is_json.q ql/src/test/queries/clientpositive/is_json.q new file mode 100644 index 0000000000..ccd1469c34 --- /dev/null +++ ql/src/test/queries/clientpositive/is_json.q @@ -0,0 +1,16 @@ +--! qt:dataset:src_json +--! qt:dataset:src + +DESCRIBE FUNCTION isjson; +DESCRIBE FUNCTION EXTENDED isjson; +DESCRIBE FUNCTION isnotjson; +DESCRIBE FUNCTION EXTENDED isnotjson; + +select 1 from src_json where json is json; + +select 1 from src_json where json is not json; + +select 1 from src where key is json; + +select 1 from src where key is not json limit 1; + diff --git ql/src/test/queries/clientpositive/json_vectorized.q ql/src/test/queries/clientpositive/json_vectorized.q new file mode 100644 index 0000000000..badabc3897 --- /dev/null +++ ql/src/test/queries/clientpositive/json_vectorized.q @@ -0,0 +1,41 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create temporary table jsonvectorized (jsonval string); + +insert into jsonvectorized values + ('{"var" : "imastring" }'), + ('{"var" : -3 }'), + ('{"var" : 1987.12342 }'), + ('{"var" : true }'), + ('{"var" : [ 1, 2, 3] }'), + ('{"var" : { "nested" : true } }'), + ('{"var" : null }'), + ('{}') + ; + +explain vectorization detail +select 1 from jsonvectorized where jsonval is json; + +explain vectorization detail +select 1 from jsonvectorized where jsonval is not json; + +explain vectorization detail +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default') + from jsonvectorized; + +explain vectorization detail +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default', 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default', 'default') + from jsonvectorized; diff --git ql/src/test/queries/clientpositive/jsonquery.q ql/src/test/queries/clientpositive/jsonquery.q new file mode 100644 index 0000000000..3b9086b245 --- /dev/null +++ ql/src/test/queries/clientpositive/jsonquery.q @@ -0,0 +1,21 @@ +create temporary table jsonquery ( + jsonval string, + c char(5), + vc varchar(100)); + +insert into jsonquery values + ('{"var" : "imastring" }', 'abc', 'def'), + ('{"var" : -3 }', 'ghi', 'jkl'), + ('{"var" : 1987.12342 }', 'mno', 'pqr'), + ('{"var" : true }', 'stu', 'vwx'), + ('{"var" : [ 1, 2, 3] }', 'yzA', 'BCD'), + ('{"var" : { "nested" : [ 1, { "key" : "value", "anotherkey" : 23 } ] } }', 'EFG', 'HIJ'), + ('{"var" : null }', 'KLM', 'NOP'), + ('{}', 'QRS', 'TUV') + ; + +select json_query(jsonval, '$.var'), + json_query(jsonval, '$.var', 'a'), + json_query(jsonval, '$.var', c, 'default'), + json_query(jsonval, '$.var', vc, 'default') + from jsonquery; diff --git ql/src/test/queries/clientpositive/jsonvalue.q ql/src/test/queries/clientpositive/jsonvalue.q new file mode 100644 index 0000000000..508d14a6c2 --- /dev/null +++ ql/src/test/queries/clientpositive/jsonvalue.q @@ -0,0 +1,39 @@ +create temporary table jsonvalue (jsonval string, defaultval string); + +insert into jsonvalue values + ('{"name" : "harry", "age" : 17, "gpa" : 3.03, "honors" : false, "classes" : [ "math", "history" ], "numbers" : [ 1 , 2]}', 'ron'), + ('{"name" : "hermione", "age" : 18, "gpa" : 3.99, "honors" : true, "classes" : [ "science", "french" ], "numbers" : [10, 20]}', 'ginny'), + ('{"name" : null, "age" : null, "gpa" : null, "honors" : null, "classes" : null}', 'no name'), + ('{}', 'empty'), + ('{ "nested" : { "street" : "12 Grimmauld Place", "years at this address" : 1 } }', 'whatever'); + +select json_value(jsonval, '$.name'), + json_value(jsonval, '$.age', 1L), + json_value(jsonval, '$.age', 1), + json_value(jsonval, '$.gpa', 100.001), + json_value(jsonval, '$.honors', true) + from jsonvalue; + +select json_value(jsonval, '$.name', 'fred', 'DEFAULT'), + json_value(jsonval, '$.age', 1L, 'DEFAULT'), + json_value(jsonval, '$.age', 1, 'DEFAULT'), + json_value(jsonval, '$.gpa', 100.001, 'DEFAULT'), + json_value(jsonval, '$.honors', true, 'DEFAULT') + from jsonvalue; + +select json_value(jsonval, '$.name', defaultval, 'DEFAULT') + from jsonvalue; + +select + json_value(jsonval, '$.classes', array('a')), + json_value(jsonval, '$.numbers', array(1)), + json_value(jsonval, '$.classes[$index]', 'a', 'NULL', 'NULL', 'index', 0) + from jsonvalue; + +select json_value(jsonval, '$.nested.street'), + json_value(jsonval, '$.nested."years at this address"', 1), + json_value(jsonval, '$.nested', named_struct('street', 'a', 'years at this address', 1)) + from jsonvalue; + + + diff --git ql/src/test/queries/clientpositive/jsonvalue_casts.q ql/src/test/queries/clientpositive/jsonvalue_casts.q new file mode 100644 index 0000000000..2829259af7 --- /dev/null +++ ql/src/test/queries/clientpositive/jsonvalue_casts.q @@ -0,0 +1,23 @@ +create temporary table jsonvaluecast (jsonval string); + +insert into jsonvaluecast values + ('{"var" : "imastring" }'), + ('{"var" : -3 }'), + ('{"var" : 1987.12342 }'), + ('{"var" : true }'), + ('{"var" : [ 1, 2, 3] }'), + ('{"var" : { "nested" : true } }'), + ('{"var" : null }'), + ('{}') + ; + +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default', 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default', 'default') + from jsonvaluecast; diff --git ql/src/test/queries/clientpositive/jsonvalue_types.q ql/src/test/queries/clientpositive/jsonvalue_types.q new file mode 100644 index 0000000000..f5e4345de8 --- /dev/null +++ ql/src/test/queries/clientpositive/jsonvalue_types.q @@ -0,0 +1,54 @@ +create temporary table jsonalltypes( + strjson string, + charjson char(200), + vcharjson varchar(255), + chardef char(5), + vchardef varchar(100), + int1 tinyint, + int2 smallint, + int4 int, + int8 bigint, + dec4 float, + dec8 double, + decbig decimal(12, 4), + bbool boolean, + llist array, + sstruct struct + ); + +insert into jsonalltypes values + ('{ "skey" : "sval", "ikey" : 3, "dkey" : 3.141592654, "bkey" : true , "lkey" : [ 100, 200 ], "stkey" : { "key" : -1, "name" : "structname" }}', + '{ "skey" : "xxxx", "ikey" : 4, "dkey" : 2.718281828, "bkey" : false, "lkey" : [ 101, 201 ], "stkey" : { "key" : 18, "name" : "yyyyyyyyyy" }}', + '{ "skey" : "zzzz", "ikey" : 5, "dkey" : 1.618033988, "bkey" : false, "lkey" : [ 102, 202 ], "stkey" : { "key" : 19, "name" : "aaaaaaaaaa" }}', + 'zz', 'yy', 1, 2, 3, 4, 1.1, 2.2, 10328.23, true, array(5, 6), named_struct('key', 3, 'name', 'n')), + ('{ "skey" : "bbbb", "ikey" : 6, "dkey" : 1.414213562, "bkey" : true , "lkey" : [ 103, 204 ], "stkey" : { "key" : -8, "name" : "eeeeeeeeee" }}', + '{ "skey" : "cccc", "ikey" : 7, "dkey" : 1.732050807, "bkey" : false, "lkey" : [ 104, 204 ], "stkey" : { "key" : 18, "name" : "ffffffffff" }}', + '{ "skey" : "dddd", "ikey" : 8, "dkey" : 2.000000000, "bkey" : false, "lkey" : [ 105, 205 ], "stkey" : { "key" : 19, "name" : "gggggggggg" }}', + 'aa', 'bb', 5, 6, 7, 8, 3.1, 3.2, 30328.23, false, array(1, 2), named_struct('key', 2, 'name', 'b')), + ('{}', NULL, NULL, 'cc', 'dd', 15, 16, 17, 18, 13.1, 13.2, 130328.23, false, array(11, 12), named_struct('key', 12, 'name', 'c')) + ; + +select json_value(strjson, "$.skey", chardef, 'DEFAULT'), + json_value(strjson, "$.skey", vchardef, 'DEFAULT'), + json_value(charjson, "$.skey", vchardef, 'DEFAULT'), + json_value(vcharjson, "$.skey", chardef, 'DEFAULT') + from jsonalltypes; + +select json_value(strjson, "$.ikey", int1, 'DEFAULT'), + json_value(strjson, "$.ikey", int2, 'DEFAULT'), + json_value(strjson, "$.ikey", int4, 'DEFAULT'), + json_value(strjson, "$.ikey", int8, 'DEFAULT') + from jsonalltypes; + +select json_value(strjson, "$.dkey", dec4, 'DEFAULT'), + json_value(strjson, "$.dkey", dec8, 'DEFAULT'), + json_value(strjson, "$.dkey", decbig, 'default'), + json_value(strjson, "$.bkey", bbool, 'DEFAULT') + from jsonalltypes; + +select json_value(strjson, "$.lkey[0]", 1), + json_value(strjson, "$.stkey.key", 1), + json_value(strjson, "$.lkey", llist, 'DEFAULT'), + json_value(strjson, "$.stkey", sstruct, 'DEFAULT') + from jsonalltypes; + diff --git ql/src/test/results/clientpositive/is_json.q.out ql/src/test/results/clientpositive/is_json.q.out new file mode 100644 index 0000000000..4a1cf1d373 --- /dev/null +++ ql/src/test/results/clientpositive/is_json.q.out @@ -0,0 +1,60 @@ +PREHOOK: query: DESCRIBE FUNCTION isjson +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION isjson +POSTHOOK: type: DESCFUNCTION +string IS JSON - Parses the given string to see if it is valid JSON +PREHOOK: query: DESCRIBE FUNCTION EXTENDED isjson +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED isjson +POSTHOOK: type: DESCFUNCTION +string IS JSON - Parses the given string to see if it is valid JSON +Returns null if json is null, otherwise true or false +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFIsJson +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION isnotjson +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION isnotjson +POSTHOOK: type: DESCFUNCTION +string IS NOT JSON - Parses the given string to see if it is not valid JSON +PREHOOK: query: DESCRIBE FUNCTION EXTENDED isnotjson +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED isnotjson +POSTHOOK: type: DESCFUNCTION +string IS NOT JSON - Parses the given string to see if it is not valid JSON +Returns null if json is null, otherwise true or false +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFIsNotJson +Function type:BUILTIN +PREHOOK: query: select 1 from src_json where json is json +PREHOOK: type: QUERY +PREHOOK: Input: default@src_json +#### A masked pattern was here #### +POSTHOOK: query: select 1 from src_json where json is json +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_json +#### A masked pattern was here #### +1 +PREHOOK: query: select 1 from src_json where json is not json +PREHOOK: type: QUERY +PREHOOK: Input: default@src_json +#### A masked pattern was here #### +POSTHOOK: query: select 1 from src_json where json is not json +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_json +#### A masked pattern was here #### +PREHOOK: query: select 1 from src where key is json +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select 1 from src where key is json +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: select 1 from src where key is not json limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select 1 from src where key is not json limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 diff --git ql/src/test/results/clientpositive/json_vectorized.q.out ql/src/test/results/clientpositive/json_vectorized.q.out new file mode 100644 index 0000000000..cf74fd2bb6 --- /dev/null +++ ql/src/test/results/clientpositive/json_vectorized.q.out @@ -0,0 +1,357 @@ +PREHOOK: query: create temporary table jsonvectorized (jsonval string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@jsonvectorized +POSTHOOK: query: create temporary table jsonvectorized (jsonval string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@jsonvectorized +PREHOOK: query: insert into jsonvectorized values + ('{"var" : "imastring" }'), + ('{"var" : -3 }'), + ('{"var" : 1987.12342 }'), + ('{"var" : true }'), + ('{"var" : [ 1, 2, 3] }'), + ('{"var" : { "nested" : true } }'), + ('{"var" : null }'), + ('{}') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@jsonvectorized +POSTHOOK: query: insert into jsonvectorized values + ('{"var" : "imastring" }'), + ('{"var" : -3 }'), + ('{"var" : 1987.12342 }'), + ('{"var" : true }'), + ('{"var" : [ 1, 2, 3] }'), + ('{"var" : { "nested" : true } }'), + ('{"var" : null }'), + ('{}') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@jsonvectorized +POSTHOOK: Lineage: jsonvectorized.jsonval SCRIPT [] +PREHOOK: query: explain vectorization detail +select 1 from jsonvectorized where jsonval is json +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select 1 from jsonvectorized where jsonval is json +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: jsonvectorized + filterExpr: isjson(jsonval) (type: boolean) + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:jsonval:string, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 2:boolean)(children: VectorUDFAdaptor(isjson(jsonval)) -> 2:boolean) + predicate: isjson(jsonval) (type: boolean) + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + selectExpressions: ConstantVectorExpression(val 1) -> 3:int + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: jsonval:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +select 1 from jsonvectorized where jsonval is not json +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select 1 from jsonvectorized where jsonval is not json +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: jsonvectorized + filterExpr: isjson(jsonval) (type: boolean) + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:jsonval:string, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsTrue(col 2:boolean)(children: VectorUDFAdaptor(isjson(jsonval)) -> 2:boolean) + predicate: isjson(jsonval) (type: boolean) + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + selectExpressions: ConstantVectorExpression(val 1) -> 3:int + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: jsonval:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default') + from jsonvectorized +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default') + from jsonvectorized +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: jsonvectorized + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:jsonval:string, 1:ROW__ID:struct] + Select Operator + expressions: json_value(jsonval, '$.var') (type: string), json_value(jsonval, '$.var', 'a') (type: string), json_value(jsonval, '$.var', 1, 'default') (type: int), json_value(jsonval, '$.var', 1000000.000001, 'default') (type: decimal(13,6)), json_value(jsonval, '$.var', false, 'default') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4, 5, 6] + selectExpressions: VectorUDFAdaptor(json_value(jsonval, '$.var')) -> 2:string, VectorUDFAdaptor(json_value(jsonval, '$.var', 'a')) -> 3:string, VectorUDFAdaptor(json_value(jsonval, '$.var', 1, 'default')) -> 4:int, VectorUDFAdaptor(json_value(jsonval, '$.var', 1000000.000001, 'default')) -> 5:decimal(13,6), VectorUDFAdaptor(json_value(jsonval, '$.var', false, 'default')) -> 6:boolean + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: jsonval:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, bigint, decimal(13,6), bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default', 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default', 'default') + from jsonvectorized +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default', 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default', 'default') + from jsonvectorized +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvectorized +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: jsonvectorized + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:jsonval:string, 1:ROW__ID:struct] + Select Operator + expressions: json_value(jsonval, '$.var') (type: string), json_value(jsonval, '$.var', 'a') (type: string), json_value(jsonval, '$.var', 1, 'default') (type: int), json_value(jsonval, '$.var', 1000000.000001, 'default') (type: decimal(13,6)), json_value(jsonval, '$.var', false, 'default') (type: boolean), json_value(jsonval, '$.var', array(4,5), 'default') (type: array), json_value(jsonval, '$.var', array(4,5), 'default', 'default') (type: array), json_value(jsonval, '$.var', named_struct('nested',false), 'default') (type: struct), json_value(jsonval, '$.var', named_struct('nested',false), 'default', 'default') (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4, 5, 6, 8, 10, 12, 14] + selectExpressions: VectorUDFAdaptor(json_value(jsonval, '$.var')) -> 2:string, VectorUDFAdaptor(json_value(jsonval, '$.var', 'a')) -> 3:string, VectorUDFAdaptor(json_value(jsonval, '$.var', 1, 'default')) -> 4:int, VectorUDFAdaptor(json_value(jsonval, '$.var', 1000000.000001, 'default')) -> 5:decimal(13,6), VectorUDFAdaptor(json_value(jsonval, '$.var', false, 'default')) -> 6:boolean, VectorUDFAdaptor(json_value(jsonval, '$.var', array(4,5), 'default'))(children: VectorUDFAdaptor(array(4,5)) -> 7:array) -> 8:array, VectorUDFAdaptor(json_value(jsonval, '$.var', array(4,5), 'default', 'default'))(children: VectorUDFAdaptor(array(4,5)) -> 9:array) -> 10:array, VectorUDFAdaptor(json_value(jsonval, '$.var', named_struct('nested',false), 'default'))(children: VectorUDFAdaptor(named_struct('nested',false)) -> 11:struct) -> 12:struct, VectorUDFAdaptor(json_value(jsonval, '$.var', named_struct('nested',false), 'default', 'default'))(children: VectorUDFAdaptor(named_struct('nested',false)) -> 13:struct) -> 14:struct + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: jsonval:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, bigint, decimal(13,6), bigint, array, array, array, array, struct, struct, struct, struct] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/jsonquery.q.out ql/src/test/results/clientpositive/jsonquery.q.out new file mode 100644 index 0000000000..d04d7e69bf --- /dev/null +++ ql/src/test/results/clientpositive/jsonquery.q.out @@ -0,0 +1,65 @@ +PREHOOK: query: create temporary table jsonquery ( + jsonval string, + c char(5), + vc varchar(100)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@jsonquery +POSTHOOK: query: create temporary table jsonquery ( + jsonval string, + c char(5), + vc varchar(100)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@jsonquery +PREHOOK: query: insert into jsonquery values + ('{"var" : "imastring" }', 'abc', 'def'), + ('{"var" : -3 }', 'ghi', 'jkl'), + ('{"var" : 1987.12342 }', 'mno', 'pqr'), + ('{"var" : true }', 'stu', 'vwx'), + ('{"var" : [ 1, 2, 3] }', 'yzA', 'BCD'), + ('{"var" : { "nested" : [ 1, { "key" : "value", "anotherkey" : 23 } ] } }', 'EFG', 'HIJ'), + ('{"var" : null }', 'KLM', 'NOP'), + ('{}', 'QRS', 'TUV') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@jsonquery +POSTHOOK: query: insert into jsonquery values + ('{"var" : "imastring" }', 'abc', 'def'), + ('{"var" : -3 }', 'ghi', 'jkl'), + ('{"var" : 1987.12342 }', 'mno', 'pqr'), + ('{"var" : true }', 'stu', 'vwx'), + ('{"var" : [ 1, 2, 3] }', 'yzA', 'BCD'), + ('{"var" : { "nested" : [ 1, { "key" : "value", "anotherkey" : 23 } ] } }', 'EFG', 'HIJ'), + ('{"var" : null }', 'KLM', 'NOP'), + ('{}', 'QRS', 'TUV') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@jsonquery +POSTHOOK: Lineage: jsonquery.c SCRIPT [] +POSTHOOK: Lineage: jsonquery.jsonval SCRIPT [] +POSTHOOK: Lineage: jsonquery.vc SCRIPT [] +PREHOOK: query: select json_query(jsonval, '$.var'), + json_query(jsonval, '$.var', 'a'), + json_query(jsonval, '$.var', c, 'default'), + json_query(jsonval, '$.var', vc, 'default') + from jsonquery +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonquery +#### A masked pattern was here #### +POSTHOOK: query: select json_query(jsonval, '$.var'), + json_query(jsonval, '$.var', 'a'), + json_query(jsonval, '$.var', c, 'default'), + json_query(jsonval, '$.var', vc, 'default') + from jsonquery +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonquery +#### A masked pattern was here #### +imastring imastring imast imastring +-3 -3 -3 -3 +1987.12342 1987.12342 1987. 1987.12342 +true true true true +[1,2,3] [1,2,3] [1,2, [1,2,3] +{"nested":[1,{"anotherkey":23,"key":"value"}]} {"nested":[1,{"anotherkey":23,"key":"value"}]} {"nes {"nested":[1,{"anotherkey":23,"key":"value"}]} +NULL NULL NULL NULL +NULL NULL QRS TUV diff --git ql/src/test/results/clientpositive/jsonvalue.q.out ql/src/test/results/clientpositive/jsonvalue.q.out new file mode 100644 index 0000000000..ed7407b4a1 --- /dev/null +++ ql/src/test/results/clientpositive/jsonvalue.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: create temporary table jsonvalue (jsonval string, defaultval string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@jsonvalue +POSTHOOK: query: create temporary table jsonvalue (jsonval string, defaultval string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@jsonvalue +PREHOOK: query: insert into jsonvalue values + ('{"name" : "harry", "age" : 17, "gpa" : 3.03, "honors" : false, "classes" : [ "math", "history" ], "numbers" : [ 1 , 2]}', 'ron'), + ('{"name" : "hermione", "age" : 18, "gpa" : 3.99, "honors" : true, "classes" : [ "science", "french" ], "numbers" : [10, 20]}', 'ginny'), + ('{"name" : null, "age" : null, "gpa" : null, "honors" : null, "classes" : null}', 'no name'), + ('{}', 'empty'), + ('{ "nested" : { "street" : "12 Grimmauld Place", "years at this address" : 1 } }', 'whatever') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@jsonvalue +POSTHOOK: query: insert into jsonvalue values + ('{"name" : "harry", "age" : 17, "gpa" : 3.03, "honors" : false, "classes" : [ "math", "history" ], "numbers" : [ 1 , 2]}', 'ron'), + ('{"name" : "hermione", "age" : 18, "gpa" : 3.99, "honors" : true, "classes" : [ "science", "french" ], "numbers" : [10, 20]}', 'ginny'), + ('{"name" : null, "age" : null, "gpa" : null, "honors" : null, "classes" : null}', 'no name'), + ('{}', 'empty'), + ('{ "nested" : { "street" : "12 Grimmauld Place", "years at this address" : 1 } }', 'whatever') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@jsonvalue +POSTHOOK: Lineage: jsonvalue.defaultval SCRIPT [] +POSTHOOK: Lineage: jsonvalue.jsonval SCRIPT [] +PREHOOK: query: select json_value(jsonval, '$.name'), + json_value(jsonval, '$.age', 1L), + json_value(jsonval, '$.age', 1), + json_value(jsonval, '$.gpa', 100.001), + json_value(jsonval, '$.honors', true) + from jsonvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +POSTHOOK: query: select json_value(jsonval, '$.name'), + json_value(jsonval, '$.age', 1L), + json_value(jsonval, '$.age', 1), + json_value(jsonval, '$.gpa', 100.001), + json_value(jsonval, '$.honors', true) + from jsonvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +harry 17 17 3.030 false +hermione 18 18 3.990 true +NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL +PREHOOK: query: select json_value(jsonval, '$.name', 'fred', 'DEFAULT'), + json_value(jsonval, '$.age', 1L, 'DEFAULT'), + json_value(jsonval, '$.age', 1, 'DEFAULT'), + json_value(jsonval, '$.gpa', 100.001, 'DEFAULT'), + json_value(jsonval, '$.honors', true, 'DEFAULT') + from jsonvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +POSTHOOK: query: select json_value(jsonval, '$.name', 'fred', 'DEFAULT'), + json_value(jsonval, '$.age', 1L, 'DEFAULT'), + json_value(jsonval, '$.age', 1, 'DEFAULT'), + json_value(jsonval, '$.gpa', 100.001, 'DEFAULT'), + json_value(jsonval, '$.honors', true, 'DEFAULT') + from jsonvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +harry 17 17 3.030 false +hermione 18 18 3.990 true +NULL NULL NULL NULL NULL +fred 1 1 100.001 true +fred 1 1 100.001 true +PREHOOK: query: select json_value(jsonval, '$.name', defaultval, 'DEFAULT') + from jsonvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +POSTHOOK: query: select json_value(jsonval, '$.name', defaultval, 'DEFAULT') + from jsonvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +harry +hermione +NULL +empty +whatever +PREHOOK: query: select + json_value(jsonval, '$.classes', array('a')), + json_value(jsonval, '$.numbers', array(1)), + json_value(jsonval, '$.classes[$index]', 'a', 'NULL', 'NULL', 'index', 0) + from jsonvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +POSTHOOK: query: select + json_value(jsonval, '$.classes', array('a')), + json_value(jsonval, '$.numbers', array(1)), + json_value(jsonval, '$.classes[$index]', 'a', 'NULL', 'NULL', 'index', 0) + from jsonvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +["math","history"] [1,2] math +["science","french"] [10,20] science +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +PREHOOK: query: select json_value(jsonval, '$.nested.street'), + json_value(jsonval, '$.nested."years at this address"', 1), + json_value(jsonval, '$.nested', named_struct('street', 'a', 'years at this address', 1)) + from jsonvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +POSTHOOK: query: select json_value(jsonval, '$.nested.street'), + json_value(jsonval, '$.nested."years at this address"', 1), + json_value(jsonval, '$.nested', named_struct('street', 'a', 'years at this address', 1)) + from jsonvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvalue +#### A masked pattern was here #### +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +12 Grimmauld Place 1 {"street":"12 Grimmauld Place","years at this address":1} diff --git ql/src/test/results/clientpositive/jsonvalue_casts.q.out ql/src/test/results/clientpositive/jsonvalue_casts.q.out new file mode 100644 index 0000000000..f99069aaf2 --- /dev/null +++ ql/src/test/results/clientpositive/jsonvalue_casts.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: create temporary table jsonvaluecast (jsonval string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@jsonvaluecast +POSTHOOK: query: create temporary table jsonvaluecast (jsonval string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@jsonvaluecast +PREHOOK: query: insert into jsonvaluecast values + ('{"var" : "imastring" }'), + ('{"var" : -3 }'), + ('{"var" : 1987.12342 }'), + ('{"var" : true }'), + ('{"var" : [ 1, 2, 3] }'), + ('{"var" : { "nested" : true } }'), + ('{"var" : null }'), + ('{}') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@jsonvaluecast +POSTHOOK: query: insert into jsonvaluecast values + ('{"var" : "imastring" }'), + ('{"var" : -3 }'), + ('{"var" : 1987.12342 }'), + ('{"var" : true }'), + ('{"var" : [ 1, 2, 3] }'), + ('{"var" : { "nested" : true } }'), + ('{"var" : null }'), + ('{}') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@jsonvaluecast +POSTHOOK: Lineage: jsonvaluecast.jsonval SCRIPT [] +PREHOOK: query: select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default', 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default', 'default') + from jsonvaluecast +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonvaluecast +#### A masked pattern was here #### +POSTHOOK: query: select json_value(jsonval, '$.var'), + json_value(jsonval, '$.var', 'a'), + json_value(jsonval, '$.var', 1, 'default'), + json_value(jsonval, '$.var', 1000000.000001, 'default'), + json_value(jsonval, '$.var', false, 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default'), + json_value(jsonval, '$.var', array(4, 5), 'default', 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default'), + json_value(jsonval, '$.var', named_struct('nested', false), 'default', 'default') + from jsonvaluecast +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonvaluecast +#### A masked pattern was here #### +imastring imastring NULL NULL true NULL [4,5] NULL {"nested":false} +-3 -3 -3 -3.000000 true NULL [4,5] NULL {"nested":false} +1987.12342 1987.12342 1987 1987.123420 true NULL [4,5] NULL {"nested":false} +TRUE TRUE 1 1.000000 true NULL [4,5] NULL {"nested":false} +[1, 2, 3] [1, 2, 3] NULL NULL true [1,2,3] [1,2,3] NULL {"nested":false} +{nested=true} {nested=true} NULL NULL true NULL [4,5] {"nested":true} {"nested":true} +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL 1 1000000.000001 false [4,5] [4,5] {"nested":false} {"nested":false} diff --git ql/src/test/results/clientpositive/jsonvalue_types.q.out ql/src/test/results/clientpositive/jsonvalue_types.q.out new file mode 100644 index 0000000000..42fd5a6b1c --- /dev/null +++ ql/src/test/results/clientpositive/jsonvalue_types.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: create temporary table jsonalltypes( + strjson string, + charjson char(200), + vcharjson varchar(255), + chardef char(5), + vchardef varchar(100), + int1 tinyint, + int2 smallint, + int4 int, + int8 bigint, + dec4 float, + dec8 double, + decbig decimal(12, 4), + bbool boolean, + llist array, + sstruct struct + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@jsonalltypes +POSTHOOK: query: create temporary table jsonalltypes( + strjson string, + charjson char(200), + vcharjson varchar(255), + chardef char(5), + vchardef varchar(100), + int1 tinyint, + int2 smallint, + int4 int, + int8 bigint, + dec4 float, + dec8 double, + decbig decimal(12, 4), + bbool boolean, + llist array, + sstruct struct + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@jsonalltypes +PREHOOK: query: insert into jsonalltypes values + ('{ "skey" : "sval", "ikey" : 3, "dkey" : 3.141592654, "bkey" : true , "lkey" : [ 100, 200 ], "stkey" : { "key" : -1, "name" : "structname" }}', + '{ "skey" : "xxxx", "ikey" : 4, "dkey" : 2.718281828, "bkey" : false, "lkey" : [ 101, 201 ], "stkey" : { "key" : 18, "name" : "yyyyyyyyyy" }}', + '{ "skey" : "zzzz", "ikey" : 5, "dkey" : 1.618033988, "bkey" : false, "lkey" : [ 102, 202 ], "stkey" : { "key" : 19, "name" : "aaaaaaaaaa" }}', + 'zz', 'yy', 1, 2, 3, 4, 1.1, 2.2, 10328.23, true, array(5, 6), named_struct('key', 3, 'name', 'n')), + ('{ "skey" : "bbbb", "ikey" : 6, "dkey" : 1.414213562, "bkey" : true , "lkey" : [ 103, 204 ], "stkey" : { "key" : -8, "name" : "eeeeeeeeee" }}', + '{ "skey" : "cccc", "ikey" : 7, "dkey" : 1.732050807, "bkey" : false, "lkey" : [ 104, 204 ], "stkey" : { "key" : 18, "name" : "ffffffffff" }}', + '{ "skey" : "dddd", "ikey" : 8, "dkey" : 2.000000000, "bkey" : false, "lkey" : [ 105, 205 ], "stkey" : { "key" : 19, "name" : "gggggggggg" }}', + 'aa', 'bb', 5, 6, 7, 8, 3.1, 3.2, 30328.23, false, array(1, 2), named_struct('key', 2, 'name', 'b')), + ('{}', NULL, NULL, 'cc', 'dd', 15, 16, 17, 18, 13.1, 13.2, 130328.23, false, array(11, 12), named_struct('key', 12, 'name', 'c')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@jsonalltypes +POSTHOOK: query: insert into jsonalltypes values + ('{ "skey" : "sval", "ikey" : 3, "dkey" : 3.141592654, "bkey" : true , "lkey" : [ 100, 200 ], "stkey" : { "key" : -1, "name" : "structname" }}', + '{ "skey" : "xxxx", "ikey" : 4, "dkey" : 2.718281828, "bkey" : false, "lkey" : [ 101, 201 ], "stkey" : { "key" : 18, "name" : "yyyyyyyyyy" }}', + '{ "skey" : "zzzz", "ikey" : 5, "dkey" : 1.618033988, "bkey" : false, "lkey" : [ 102, 202 ], "stkey" : { "key" : 19, "name" : "aaaaaaaaaa" }}', + 'zz', 'yy', 1, 2, 3, 4, 1.1, 2.2, 10328.23, true, array(5, 6), named_struct('key', 3, 'name', 'n')), + ('{ "skey" : "bbbb", "ikey" : 6, "dkey" : 1.414213562, "bkey" : true , "lkey" : [ 103, 204 ], "stkey" : { "key" : -8, "name" : "eeeeeeeeee" }}', + '{ "skey" : "cccc", "ikey" : 7, "dkey" : 1.732050807, "bkey" : false, "lkey" : [ 104, 204 ], "stkey" : { "key" : 18, "name" : "ffffffffff" }}', + '{ "skey" : "dddd", "ikey" : 8, "dkey" : 2.000000000, "bkey" : false, "lkey" : [ 105, 205 ], "stkey" : { "key" : 19, "name" : "gggggggggg" }}', + 'aa', 'bb', 5, 6, 7, 8, 3.1, 3.2, 30328.23, false, array(1, 2), named_struct('key', 2, 'name', 'b')), + ('{}', NULL, NULL, 'cc', 'dd', 15, 16, 17, 18, 13.1, 13.2, 130328.23, false, array(11, 12), named_struct('key', 12, 'name', 'c')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@jsonalltypes +POSTHOOK: Lineage: jsonalltypes.bbool SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.chardef SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.charjson SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.dec4 SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.dec8 SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.decbig SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.int1 SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.int2 SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.int4 SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.int8 SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.llist SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.sstruct SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.strjson SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.vchardef SCRIPT [] +POSTHOOK: Lineage: jsonalltypes.vcharjson SCRIPT [] +PREHOOK: query: select json_value(strjson, "$.skey", chardef, 'DEFAULT'), + json_value(strjson, "$.skey", vchardef, 'DEFAULT'), + json_value(charjson, "$.skey", vchardef, 'DEFAULT'), + json_value(vcharjson, "$.skey", chardef, 'DEFAULT') + from jsonalltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +POSTHOOK: query: select json_value(strjson, "$.skey", chardef, 'DEFAULT'), + json_value(strjson, "$.skey", vchardef, 'DEFAULT'), + json_value(charjson, "$.skey", vchardef, 'DEFAULT'), + json_value(vcharjson, "$.skey", chardef, 'DEFAULT') + from jsonalltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +sval sval xxxx zzzz +bbbb bbbb cccc dddd +cc dd NULL NULL +PREHOOK: query: select json_value(strjson, "$.ikey", int1, 'DEFAULT'), + json_value(strjson, "$.ikey", int2, 'DEFAULT'), + json_value(strjson, "$.ikey", int4, 'DEFAULT'), + json_value(strjson, "$.ikey", int8, 'DEFAULT') + from jsonalltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +POSTHOOK: query: select json_value(strjson, "$.ikey", int1, 'DEFAULT'), + json_value(strjson, "$.ikey", int2, 'DEFAULT'), + json_value(strjson, "$.ikey", int4, 'DEFAULT'), + json_value(strjson, "$.ikey", int8, 'DEFAULT') + from jsonalltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +3 3 3 3 +6 6 6 6 +15 16 17 18 +PREHOOK: query: select json_value(strjson, "$.dkey", dec4, 'DEFAULT'), + json_value(strjson, "$.dkey", dec8, 'DEFAULT'), + json_value(strjson, "$.dkey", decbig, 'default'), + json_value(strjson, "$.bkey", bbool, 'DEFAULT') + from jsonalltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +POSTHOOK: query: select json_value(strjson, "$.dkey", dec4, 'DEFAULT'), + json_value(strjson, "$.dkey", dec8, 'DEFAULT'), + json_value(strjson, "$.dkey", decbig, 'default'), + json_value(strjson, "$.bkey", bbool, 'DEFAULT') + from jsonalltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +3.1415927 3.141592654 3.1416 true +1.4142135 1.414213562 1.4142 true +13.1 13.2 130328.2300 false +PREHOOK: query: select json_value(strjson, "$.lkey[0]", 1), + json_value(strjson, "$.stkey.key", 1), + json_value(strjson, "$.lkey", llist, 'DEFAULT'), + json_value(strjson, "$.stkey", sstruct, 'DEFAULT') + from jsonalltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +POSTHOOK: query: select json_value(strjson, "$.lkey[0]", 1), + json_value(strjson, "$.stkey.key", 1), + json_value(strjson, "$.lkey", llist, 'DEFAULT'), + json_value(strjson, "$.stkey", sstruct, 'DEFAULT') + from jsonalltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@jsonalltypes +#### A masked pattern was here #### +100 -1 [100,200] {"key":-1,"name":"structname"} +103 -8 [103,204] {"key":-8,"name":"eeeeeeeeee"} +NULL NULL [11,12] {"key":12,"name":"c"} diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 374e9c4fce..bba4692f7d 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -146,14 +146,18 @@ inline instr internal_interval isfalse +isjson isnotfalse +isnotjson isnotnull isnottrue isnull istrue java_method +json_query json_read json_tuple +json_value lag last_day last_value @@ -402,6 +406,7 @@ isnotfalse isnottrue istrue json_tuple +json_value last_value lcase like