diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e3ddbf197b..2d4aa2febb 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1636,7 +1636,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Note that this check currently does not consider data size, only the query pattern."), HIVE_STRICT_CHECKS_TYPE_SAFETY("hive.strict.checks.type.safety", true, "Enabling strict type safety checks disallows the following:\n" + - " Comparing bigints and strings.\n" + + " Comparing bigints and strings/varchars/chars.\n" + " Comparing bigints and doubles."), HIVE_STRICT_CHECKS_CARTESIAN("hive.strict.checks.cartesian.product", false, "Enabling strict Cartesian join checks disallows the following:\n" + diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index c55f8db61a..030e904dd3 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1943,6 +1943,7 @@ minillaplocal.query.files=\ union_null.q,\ unionall_join_nullconstant.q,\ unionall_lateralview1.q,\ + unsafe_compare.q,\ unset_table_view_property.q,\ updateAccessTime.q,\ update_after_multiple_inserts_special_characters.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java index e16966e999..8a73335686 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java @@ -28,8 +28,10 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Stack; +import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -789,12 +791,23 @@ protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, F LogHelper console = new LogHelper(LOG); + Set unsafeConventionTyps = Sets.newHashSet( + PrimitiveObjectInspector.PrimitiveCategory.STRING, + PrimitiveObjectInspector.PrimitiveCategory.VARCHAR, + PrimitiveObjectInspector.PrimitiveCategory.CHAR); // For now, if a bigint is going to be cast to a double throw an error or warning - if ((oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || - (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo))) { + if ((oiTypeInfo0 instanceof PrimitiveTypeInfo && unsafeConventionTyps.contains( + ((PrimitiveTypeInfo)oiTypeInfo0).getPrimitiveCategory()) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || + (oiTypeInfo1 instanceof PrimitiveTypeInfo && unsafeConventionTyps.contains( + ((PrimitiveTypeInfo)oiTypeInfo1).getPrimitiveCategory()) && oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo))) { String error = StrictChecks.checkTypeSafety(conf); - if (error != null) throw new UDFArgumentException(error); - console.printError("WARNING: Comparing a bigint and a string may result in a loss of precision."); + if (error != null) + throw new UDFArgumentException(error); + String type = oiTypeInfo0.getTypeName(); + if (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo)) { + type = oiTypeInfo1.getTypeName(); + } + console.printError("WARNING: Comparing a bigint and a " + type + " may result in a loss of precision."); } else if ((oiTypeInfo0.equals(TypeInfoFactory.doubleTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.doubleTypeInfo))) { console.printError("WARNING: Comparing a bigint and a double may result in a loss of precision."); diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java index 3740d98145..59a97f7308 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java @@ -21,11 +21,19 @@ import java.util.Arrays; import java.util.Collection; +import com.google.common.collect.Lists; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory.DefaultExprProcessor; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -140,4 +148,37 @@ public void testWithNonZeroFraction() throws Exception { } } + @Test + public void testValidateUDFOnTypeCheck() throws Exception { + ExprNodeDesc strCol = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_c0", null, false); + ExprNodeDesc varcharCol = new ExprNodeColumnDesc(TypeInfoFactory.varcharTypeInfo, "_c1", null, false); + ExprNodeDesc charCol = new ExprNodeColumnDesc(TypeInfoFactory.charTypeInfo, "_c2", null, false); + ExprNodeDesc cnst = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 2882303761517473127L); + FunctionInfo functionInfo = FunctionRegistry.getFunctionInfo("="); + TypeCheckCtx ctx = new TypeCheckCtx(null); + String error = HiveConf.StrictChecks.checkTypeSafety(new HiveConf()); + try { + testSubject.validateUDF(null, false, ctx, functionInfo, + Lists.newArrayList(strCol, cnst), functionInfo.getGenericUDF()); + Assert.fail("Should throw exception as compares a bigint and a string"); + } catch (Exception e) { + Assert.assertEquals(error, e.getMessage()); + } + + try { + testSubject.validateUDF(null, false, ctx, functionInfo, + Lists.newArrayList(cnst, varcharCol), functionInfo.getGenericUDF()); + Assert.fail("Should throw exception as compares a bigint and a varchar"); + } catch (Exception e) { + Assert.assertEquals(error, e.getMessage()); + } + + try { + testSubject.validateUDF(null, false, ctx, functionInfo, + Lists.newArrayList(cnst, charCol), functionInfo.getGenericUDF()); + Assert.fail("Should throw exception as compares a bigint and a char"); + } catch (Exception e) { + Assert.assertEquals(error, e.getMessage()); + } + } } diff --git ql/src/test/queries/clientpositive/unsafe_compare.q ql/src/test/queries/clientpositive/unsafe_compare.q new file mode 100644 index 0000000000..b3d09a9d71 --- /dev/null +++ ql/src/test/queries/clientpositive/unsafe_compare.q @@ -0,0 +1,10 @@ + +CREATE TABLE test_a (appid1 varchar(256), appid2 char(20)); + +INSERT INTO test_a VALUES ('2882303761517473127', '2882303761517473127'), ('2882303761517473276','2882303761517473276'); + +SET hive.strict.checks.type.safety=false; + +SELECT appid1 FROM test_a WHERE appid1 = 2882303761517473127; + +SELECT appid2 FROM test_a WHERE appid2 = 2882303761517473127; diff --git ql/src/test/results/clientpositive/llap/unsafe_compare.q.out ql/src/test/results/clientpositive/llap/unsafe_compare.q.out new file mode 100644 index 0000000000..fb4e50e32e --- /dev/null +++ ql/src/test/results/clientpositive/llap/unsafe_compare.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: CREATE TABLE test_a (appid1 varchar(256), appid2 char(20)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_a +POSTHOOK: query: CREATE TABLE test_a (appid1 varchar(256), appid2 char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_a +PREHOOK: query: INSERT INTO test_a VALUES ('2882303761517473127', '2882303761517473127'), ('2882303761517473276','2882303761517473276') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_a +POSTHOOK: query: INSERT INTO test_a VALUES ('2882303761517473127', '2882303761517473127'), ('2882303761517473276','2882303761517473276') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_a +POSTHOOK: Lineage: test_a.appid1 SCRIPT [] +POSTHOOK: Lineage: test_a.appid2 SCRIPT [] +WARNING: Comparing a bigint and a varchar(256) may result in a loss of precision. +PREHOOK: query: SELECT appid1 FROM test_a WHERE appid1 = 2882303761517473127 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_a +#### A masked pattern was here #### +POSTHOOK: query: SELECT appid1 FROM test_a WHERE appid1 = 2882303761517473127 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_a +#### A masked pattern was here #### +2882303761517473127 +2882303761517473276 +WARNING: Comparing a bigint and a char(20) may result in a loss of precision. +PREHOOK: query: SELECT appid2 FROM test_a WHERE appid2 = 2882303761517473127 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_a +#### A masked pattern was here #### +POSTHOOK: query: SELECT appid2 FROM test_a WHERE appid2 = 2882303761517473127 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_a +#### A masked pattern was here #### +2882303761517473127 +2882303761517473276