Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1355201) +++ conf/hive-default.xml.template (working copy) @@ -1292,6 +1292,15 @@ + hive.mapred.bigint.comparison.mode + nonstrict + If this is set to be strict, whenever a bigint may implictely be converted to a double, either because of a where condition, a join + condtion, or two columns being combined as part of a union, where in any of these cases one side is a bigint and the other is a string or a double, + an exception will be thrown alerting the user to the possibility of a loss in precision. If this is set to nonstrict, a warning to that effect is + given. + + + hive.transform.escape.input false Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1355201) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -370,6 +370,9 @@ HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5), HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true), + // prevents comparisons between bigints and doubles/strings for precision reasons + HIVEBIGINTCOMPARISIONMODE("hive.mapred.bigint.comparison.mode", "nonstrict"), + // for hive udtf operator HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false), Index: ql/src/test/results/clientnegative/compare_string_bigint.q.out =================================================================== --- ql/src/test/results/clientnegative/compare_string_bigint.q.out (revision 1355201) +++ ql/src/test/results/clientnegative/compare_string_bigint.q.out (working copy) @@ -1 +1,2 @@ -FAILED: SemanticException Line 0:-1 Wrong arguments ''1'': In strict mode, comparing bigints and strings is not allowed, it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.mode=nonstrict +WARNING: Comparing a bigint and a string may result in a loss of precision. +FAILED: SemanticException Line 0:-1 Wrong arguments ''1'': In strict mode, comparing, joining on, and unioning bigints and strings is not allowed asit may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict Index: ql/src/test/results/clientnegative/compare_string_bigint_union.q.out =================================================================== --- ql/src/test/results/clientnegative/compare_string_bigint_union.q.out (revision 0) +++ ql/src/test/results/clientnegative/compare_string_bigint_union.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: UDFArgumentException [Error 10124]: In strict mode, comparing, joining on, and unioning bigints and doubles is not allowed as it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict Index: ql/src/test/results/clientnegative/compare_string_bigint_join.q.out =================================================================== --- ql/src/test/results/clientnegative/compare_string_bigint_join.q.out (revision 0) +++ ql/src/test/results/clientnegative/compare_string_bigint_join.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: UDFArgumentException [Error 10123]: In strict mode, comparing, joining on, and unioning bigints and strings is not allowed asit may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict Index: ql/src/test/results/clientnegative/compare_double_bigint_union.q.out =================================================================== --- ql/src/test/results/clientnegative/compare_double_bigint_union.q.out (revision 0) +++ ql/src/test/results/clientnegative/compare_double_bigint_union.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: UDFArgumentException [Error 10124]: In strict mode, comparing, joining on, and unioning bigints and doubles is not allowed as it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict Index: ql/src/test/results/clientnegative/compare_double_bigint_join.q.out =================================================================== --- ql/src/test/results/clientnegative/compare_double_bigint_join.q.out (revision 0) +++ ql/src/test/results/clientnegative/compare_double_bigint_join.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: UDFArgumentException [Error 10123]: In strict mode, comparing, joining on, and unioning bigints and strings is not allowed asit may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict Index: ql/src/test/results/clientnegative/compare_double_bigint.q.out =================================================================== --- ql/src/test/results/clientnegative/compare_double_bigint.q.out (revision 1355201) +++ ql/src/test/results/clientnegative/compare_double_bigint.q.out (working copy) @@ -1 +1,2 @@ -FAILED: SemanticException Line 0:-1 Wrong arguments '1.0': In strict mode, comparing bigints and doubles is not allowed, it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.mode=nonstrict +WARNING: Comparing a bigint and a double may result in a loss of precision. +FAILED: SemanticException Line 0:-1 Wrong arguments '1.0': In strict mode, comparing, joining on, and unioning bigints and doubles is not allowed as it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict Index: ql/src/test/results/clientpositive/filter_join_breaktask2.q.out =================================================================== --- ql/src/test/results/clientpositive/filter_join_breaktask2.q.out (revision 1355201) +++ ql/src/test/results/clientpositive/filter_join_breaktask2.q.out (working copy) @@ -766,6 +766,7 @@ POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c83 EXPRESSION [] POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c9 SIMPLE [] 4 1 1 8 4 5 1 0 9 U 2 2 0 2 1 1 J C A U 2 s 2 NULL NULL NULL NULL NULL NULL 1 j S 6 NULL 1 2 J g 1 e 2 1 2 U P p 3 0 0 0 1 1 1 0 0 0 6 2 j NULL NULL NULL NULL NULL NULL 5 NULL NULL j 2 2 1 2 2 1 1 1 1 1 1 1 1 32 NULL 2010-04-17 +WARNING: Comparing, joining on, or unioning a bigint and a string as part of a join may result in a loss of precision. PREHOOK: query: SELECT a.c1 as a_c1, b.c1 b_c1, d.c0 as d_c0 FROM T1 a JOIN T2 b ON (a.c1 = b.c1 AND a.ds='2010-04-17' AND b.ds='2010-04-17') Index: ql/src/test/queries/clientnegative/compare_double_bigint_union.q =================================================================== --- ql/src/test/queries/clientnegative/compare_double_bigint_union.q (revision 0) +++ ql/src/test/queries/clientnegative/compare_double_bigint_union.q (revision 0) @@ -0,0 +1,6 @@ +set hive.mapred.bigint.comparison.mode=strict; + +-- This should fail until we fix the issue with precision when casting a bigint to a double + +select * from (select cast(key as double) from src union all select cast(key as bigint) from src) a limit 10; + Index: ql/src/test/queries/clientnegative/compare_string_bigint_join.q =================================================================== --- ql/src/test/queries/clientnegative/compare_string_bigint_join.q (revision 0) +++ ql/src/test/queries/clientnegative/compare_string_bigint_join.q (revision 0) @@ -0,0 +1,6 @@ +set hive.mapred.bigint.comparison.mode=strict; + +--This should fail until we fix the issue with precision when casting a bigint to a double + +select * from src a join src b on (cast(a.key as bigint) = cast(b.key as string)) limit 10; + Index: ql/src/test/queries/clientnegative/compare_double_bigint.q =================================================================== --- ql/src/test/queries/clientnegative/compare_double_bigint.q (revision 1355201) +++ ql/src/test/queries/clientnegative/compare_double_bigint.q (working copy) @@ -1,4 +1,4 @@ -set hive.mapred.mode=strict; +set hive.mapred.bigint.comparison.mode=strict; -- This should fail until we fix the issue with precision when casting a bigint to a double Index: ql/src/test/queries/clientnegative/compare_double_bigint_join.q =================================================================== --- ql/src/test/queries/clientnegative/compare_double_bigint_join.q (revision 0) +++ ql/src/test/queries/clientnegative/compare_double_bigint_join.q (revision 0) @@ -0,0 +1,6 @@ +set hive.mapred.bigint.comparison.mode=strict; + +-- This should fail until we fix the issue with precision when casting a bigint to a double + +select * from src a join src b on (cast(a.key as bigint) = cast(b.key as string)) limit 10; + Index: ql/src/test/queries/clientnegative/compare_string_bigint.q =================================================================== --- ql/src/test/queries/clientnegative/compare_string_bigint.q (revision 1355201) +++ ql/src/test/queries/clientnegative/compare_string_bigint.q (working copy) @@ -1,4 +1,4 @@ -set hive.mapred.mode=strict; +set hive.mapred.bigint.comparison.mode=strict; --This should fail until we fix the issue with precision when casting a bigint to a double Index: ql/src/test/queries/clientnegative/compare_string_bigint_union.q =================================================================== --- ql/src/test/queries/clientnegative/compare_string_bigint_union.q (revision 0) +++ ql/src/test/queries/clientnegative/compare_string_bigint_union.q (revision 0) @@ -0,0 +1,6 @@ +set hive.mapred.bigint.comparison.mode=strict; + +--This should fail until we fix the issue with precision when casting a bigint to a double + +select * from (select cast(key as double) from src union all select cast(key as bigint) from src) a limit 10; + Index: ql/src/test/queries/clientpositive/filter_join_breaktask2.q =================================================================== --- ql/src/test/queries/clientpositive/filter_join_breaktask2.q (revision 1355201) +++ ql/src/test/queries/clientpositive/filter_join_breaktask2.q (working copy) @@ -26,6 +26,8 @@ select * from T3; select * from T4; +set hive.mapred.bigint.comparison.mode=nonstrict; + SELECT a.c1 as a_c1, b.c1 b_c1, d.c0 as d_c0 FROM T1 a JOIN T2 b ON (a.c1 = b.c1 AND a.ds='2010-04-17' AND b.ds='2010-04-17') Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1355201) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -200,12 +200,12 @@ INCOMPATIBLE_SCHEMA(10120, "The existing table is not compatible with the import spec. "), EXIM_FOR_NON_NATIVE(10121, "Export/Import cannot be done for a non-native table. "), INSERT_INTO_BUCKETIZED_TABLE(10122, "Bucketized tables do not support INSERT INTO:"), - NO_COMPARE_BIGINT_STRING(10123, "In strict mode, comparing bigints and strings is not allowed, " - + "it may result in a loss of precision. " - + "If you really want to perform the operation, set hive.mapred.mode=nonstrict"), - NO_COMPARE_BIGINT_DOUBLE(10124, "In strict mode, comparing bigints and doubles is not allowed, " - + "it may result in a loss of precision. " - + "If you really want to perform the operation, set hive.mapred.mode=nonstrict"), + NO_COMPARE_BIGINT_STRING(10123, "In strict mode, comparing, joining on, and unioning bigints " + + "and strings is not allowed asit may result in a loss of precision. If you really want " + + "to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict"), + NO_COMPARE_BIGINT_DOUBLE(10124, "In strict mode, comparing, joining on, and unioning bigints " + + "and doubles is not allowed as it may result in a loss of precision. If you really want " + + "to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict"), PARTSPEC_DIFFER_FROM_SCHEMA(10125, "Partition columns in partition specification are " + "not the same as that defined in the table schema. " + "The names and orders have to be exactly the same."), Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1355201) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -37,10 +37,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.udf.UDAFPercentile; import org.apache.hadoop.hive.ql.udf.UDFAbs; import org.apache.hadoop.hive.ql.udf.UDFAcos; @@ -157,7 +161,6 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr; @@ -168,8 +171,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap; @@ -192,6 +195,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFReflect; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSentences; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; @@ -225,10 +229,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.ReflectionUtils; - import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** @@ -636,11 +638,17 @@ /** * Find a common class for union-all operator + * @throws UDFArgumentException */ - public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { + public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) + throws UDFArgumentException { + if (a.equals(b)) { return a; } + + validateConversionPrecision(a, b); + if (FunctionRegistry.implicitConvertable(a, b)) { return b; } @@ -664,12 +672,18 @@ * them to double and then compare. * * @return null if no common class could be found. + * @throws UDFArgumentException */ - public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { + public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) + throws UDFArgumentException { + // If same return one of them if (a.equals(b)) { return a; } + + validateConversionPrecision(a, b); + for (TypeInfo t : numericTypeList) { if (FunctionRegistry.implicitConvertable(a, t) && FunctionRegistry.implicitConvertable(b, t)) { @@ -680,6 +694,41 @@ } /** + * Checks whether Hive's default method of type conversion for two types could result in a loss + * of precision. Either throws an exception or displays a warning. + * + * @throws UDFArgumentException + */ + public static void validateConversionPrecision(TypeInfo a, TypeInfo b) + throws UDFArgumentException { + + HiveConf conf = SessionState.get().getConf(); + LogHelper console = SessionState.getConsole(); + + if ((a.equals(TypeInfoFactory.stringTypeInfo) && b.equals(TypeInfoFactory.longTypeInfo)) || + (a.equals(TypeInfoFactory.longTypeInfo) && b.equals(TypeInfoFactory.stringTypeInfo))) { + if (HiveConf.getVar(conf, + HiveConf.ConfVars.HIVEBIGINTCOMPARISIONMODE).equalsIgnoreCase("strict")) { + throw new UDFArgumentException(ErrorMsg.NO_COMPARE_BIGINT_STRING.getMsg()); + } else { + console.printError("WARNING: Comparing, joining on, or unioning a bigint and a string " + + "as part of a join may result in a loss of precision."); + } + } else if ((a.equals(TypeInfoFactory.doubleTypeInfo) && + b.equals(TypeInfoFactory.longTypeInfo)) || + (a.equals(TypeInfoFactory.longTypeInfo) && + b.equals(TypeInfoFactory.doubleTypeInfo))) { + if (HiveConf.getVar(conf, + HiveConf.ConfVars.HIVEBIGINTCOMPARISIONMODE).equalsIgnoreCase("strict")) { + throw new UDFArgumentException(ErrorMsg.NO_COMPARE_BIGINT_DOUBLE.getMsg()); + } else { + console.printError("WARNING: Comparing, joining on, or unioning a bigint and a double " + + "as part of a join may result in a loss of precision."); + } + } + } + + /** * Find a common class that objects of both TypeInfo a and TypeInfo b can * convert to. This is used for places other than comparison. * Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java (revision 1355201) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java (working copy) @@ -136,6 +136,8 @@ if (oiTypeInfo0 != oiTypeInfo1) { compareType = CompareType.NEED_CONVERT; + FunctionRegistry.validateConversionPrecision(oiTypeInfo0, oiTypeInfo1); + // If either argument is a string, we convert to a double because a number // in string form should always be convertible into a double if (oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo)