Index: conf/hive-default.xml.template
===================================================================
--- conf/hive-default.xml.template (revision 1355201)
+++ conf/hive-default.xml.template (working copy)
@@ -1292,6 +1292,15 @@
+ hive.mapred.bigint.comparison.mode
+ nonstrict
+ If this is set to be strict, whenever a bigint may implictely be converted to a double, either because of a where condition, a join
+ condtion, or two columns being combined as part of a union, where in any of these cases one side is a bigint and the other is a string or a double,
+ an exception will be thrown alerting the user to the possibility of a loss in precision. If this is set to nonstrict, a warning to that effect is
+ given.
+
+
+
hive.transform.escape.input
false
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1355201)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -370,6 +370,9 @@
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5),
HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true),
+ // prevents comparisons between bigints and doubles/strings for precision reasons
+ HIVEBIGINTCOMPARISIONMODE("hive.mapred.bigint.comparison.mode", "nonstrict"),
+
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
Index: ql/src/test/results/clientnegative/compare_string_bigint.q.out
===================================================================
--- ql/src/test/results/clientnegative/compare_string_bigint.q.out (revision 1355201)
+++ ql/src/test/results/clientnegative/compare_string_bigint.q.out (working copy)
@@ -1 +1,2 @@
-FAILED: SemanticException Line 0:-1 Wrong arguments ''1'': In strict mode, comparing bigints and strings is not allowed, it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.mode=nonstrict
+WARNING: Comparing a bigint and a string may result in a loss of precision.
+FAILED: SemanticException Line 0:-1 Wrong arguments ''1'': In strict mode, comparing, joining on, and unioning bigints and strings is not allowed asit may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict
Index: ql/src/test/results/clientnegative/compare_string_bigint_union.q.out
===================================================================
--- ql/src/test/results/clientnegative/compare_string_bigint_union.q.out (revision 0)
+++ ql/src/test/results/clientnegative/compare_string_bigint_union.q.out (revision 0)
@@ -0,0 +1 @@
+FAILED: UDFArgumentException [Error 10124]: In strict mode, comparing, joining on, and unioning bigints and doubles is not allowed as it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict
Index: ql/src/test/results/clientnegative/compare_string_bigint_join.q.out
===================================================================
--- ql/src/test/results/clientnegative/compare_string_bigint_join.q.out (revision 0)
+++ ql/src/test/results/clientnegative/compare_string_bigint_join.q.out (revision 0)
@@ -0,0 +1 @@
+FAILED: UDFArgumentException [Error 10123]: In strict mode, comparing, joining on, and unioning bigints and strings is not allowed asit may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict
Index: ql/src/test/results/clientnegative/compare_double_bigint_union.q.out
===================================================================
--- ql/src/test/results/clientnegative/compare_double_bigint_union.q.out (revision 0)
+++ ql/src/test/results/clientnegative/compare_double_bigint_union.q.out (revision 0)
@@ -0,0 +1 @@
+FAILED: UDFArgumentException [Error 10124]: In strict mode, comparing, joining on, and unioning bigints and doubles is not allowed as it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict
Index: ql/src/test/results/clientnegative/compare_double_bigint_join.q.out
===================================================================
--- ql/src/test/results/clientnegative/compare_double_bigint_join.q.out (revision 0)
+++ ql/src/test/results/clientnegative/compare_double_bigint_join.q.out (revision 0)
@@ -0,0 +1 @@
+FAILED: UDFArgumentException [Error 10123]: In strict mode, comparing, joining on, and unioning bigints and strings is not allowed asit may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict
Index: ql/src/test/results/clientnegative/compare_double_bigint.q.out
===================================================================
--- ql/src/test/results/clientnegative/compare_double_bigint.q.out (revision 1355201)
+++ ql/src/test/results/clientnegative/compare_double_bigint.q.out (working copy)
@@ -1 +1,2 @@
-FAILED: SemanticException Line 0:-1 Wrong arguments '1.0': In strict mode, comparing bigints and doubles is not allowed, it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.mode=nonstrict
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+FAILED: SemanticException Line 0:-1 Wrong arguments '1.0': In strict mode, comparing, joining on, and unioning bigints and doubles is not allowed as it may result in a loss of precision. If you really want to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict
Index: ql/src/test/results/clientpositive/filter_join_breaktask2.q.out
===================================================================
--- ql/src/test/results/clientpositive/filter_join_breaktask2.q.out (revision 1355201)
+++ ql/src/test/results/clientpositive/filter_join_breaktask2.q.out (working copy)
@@ -766,6 +766,7 @@
POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c83 EXPRESSION []
POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c9 SIMPLE []
4 1 1 8 4 5 1 0 9 U 2 2 0 2 1 1 J C A U 2 s 2 NULL NULL NULL NULL NULL NULL 1 j S 6 NULL 1 2 J g 1 e 2 1 2 U P p 3 0 0 0 1 1 1 0 0 0 6 2 j NULL NULL NULL NULL NULL NULL 5 NULL NULL j 2 2 1 2 2 1 1 1 1 1 1 1 1 32 NULL 2010-04-17
+WARNING: Comparing, joining on, or unioning a bigint and a string as part of a join may result in a loss of precision.
PREHOOK: query: SELECT a.c1 as a_c1, b.c1 b_c1, d.c0 as d_c0
FROM T1 a JOIN T2 b
ON (a.c1 = b.c1 AND a.ds='2010-04-17' AND b.ds='2010-04-17')
Index: ql/src/test/queries/clientnegative/compare_double_bigint_union.q
===================================================================
--- ql/src/test/queries/clientnegative/compare_double_bigint_union.q (revision 0)
+++ ql/src/test/queries/clientnegative/compare_double_bigint_union.q (revision 0)
@@ -0,0 +1,6 @@
+set hive.mapred.bigint.comparison.mode=strict;
+
+-- This should fail until we fix the issue with precision when casting a bigint to a double
+
+select * from (select cast(key as double) from src union all select cast(key as bigint) from src) a limit 10;
+
Index: ql/src/test/queries/clientnegative/compare_string_bigint_join.q
===================================================================
--- ql/src/test/queries/clientnegative/compare_string_bigint_join.q (revision 0)
+++ ql/src/test/queries/clientnegative/compare_string_bigint_join.q (revision 0)
@@ -0,0 +1,6 @@
+set hive.mapred.bigint.comparison.mode=strict;
+
+--This should fail until we fix the issue with precision when casting a bigint to a double
+
+select * from src a join src b on (cast(a.key as bigint) = cast(b.key as string)) limit 10;
+
Index: ql/src/test/queries/clientnegative/compare_double_bigint.q
===================================================================
--- ql/src/test/queries/clientnegative/compare_double_bigint.q (revision 1355201)
+++ ql/src/test/queries/clientnegative/compare_double_bigint.q (working copy)
@@ -1,4 +1,4 @@
-set hive.mapred.mode=strict;
+set hive.mapred.bigint.comparison.mode=strict;
-- This should fail until we fix the issue with precision when casting a bigint to a double
Index: ql/src/test/queries/clientnegative/compare_double_bigint_join.q
===================================================================
--- ql/src/test/queries/clientnegative/compare_double_bigint_join.q (revision 0)
+++ ql/src/test/queries/clientnegative/compare_double_bigint_join.q (revision 0)
@@ -0,0 +1,6 @@
+set hive.mapred.bigint.comparison.mode=strict;
+
+-- This should fail until we fix the issue with precision when casting a bigint to a double
+
+select * from src a join src b on (cast(a.key as bigint) = cast(b.key as string)) limit 10;
+
Index: ql/src/test/queries/clientnegative/compare_string_bigint.q
===================================================================
--- ql/src/test/queries/clientnegative/compare_string_bigint.q (revision 1355201)
+++ ql/src/test/queries/clientnegative/compare_string_bigint.q (working copy)
@@ -1,4 +1,4 @@
-set hive.mapred.mode=strict;
+set hive.mapred.bigint.comparison.mode=strict;
--This should fail until we fix the issue with precision when casting a bigint to a double
Index: ql/src/test/queries/clientnegative/compare_string_bigint_union.q
===================================================================
--- ql/src/test/queries/clientnegative/compare_string_bigint_union.q (revision 0)
+++ ql/src/test/queries/clientnegative/compare_string_bigint_union.q (revision 0)
@@ -0,0 +1,6 @@
+set hive.mapred.bigint.comparison.mode=strict;
+
+--This should fail until we fix the issue with precision when casting a bigint to a double
+
+select * from (select cast(key as double) from src union all select cast(key as bigint) from src) a limit 10;
+
Index: ql/src/test/queries/clientpositive/filter_join_breaktask2.q
===================================================================
--- ql/src/test/queries/clientpositive/filter_join_breaktask2.q (revision 1355201)
+++ ql/src/test/queries/clientpositive/filter_join_breaktask2.q (working copy)
@@ -26,6 +26,8 @@
select * from T3;
select * from T4;
+set hive.mapred.bigint.comparison.mode=nonstrict;
+
SELECT a.c1 as a_c1, b.c1 b_c1, d.c0 as d_c0
FROM T1 a JOIN T2 b
ON (a.c1 = b.c1 AND a.ds='2010-04-17' AND b.ds='2010-04-17')
Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1355201)
+++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy)
@@ -200,12 +200,12 @@
INCOMPATIBLE_SCHEMA(10120, "The existing table is not compatible with the import spec. "),
EXIM_FOR_NON_NATIVE(10121, "Export/Import cannot be done for a non-native table. "),
INSERT_INTO_BUCKETIZED_TABLE(10122, "Bucketized tables do not support INSERT INTO:"),
- NO_COMPARE_BIGINT_STRING(10123, "In strict mode, comparing bigints and strings is not allowed, "
- + "it may result in a loss of precision. "
- + "If you really want to perform the operation, set hive.mapred.mode=nonstrict"),
- NO_COMPARE_BIGINT_DOUBLE(10124, "In strict mode, comparing bigints and doubles is not allowed, "
- + "it may result in a loss of precision. "
- + "If you really want to perform the operation, set hive.mapred.mode=nonstrict"),
+ NO_COMPARE_BIGINT_STRING(10123, "In strict mode, comparing, joining on, and unioning bigints "
+ + "and strings is not allowed asit may result in a loss of precision. If you really want "
+ + "to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict"),
+ NO_COMPARE_BIGINT_DOUBLE(10124, "In strict mode, comparing, joining on, and unioning bigints "
+ + "and doubles is not allowed as it may result in a loss of precision. If you really want "
+ + "to perform the operation, set hive.mapred.bigint.comparison.mode=nonstrict"),
PARTSPEC_DIFFER_FROM_SCHEMA(10125, "Partition columns in partition specification are "
+ "not the same as that defined in the table schema. "
+ "The names and orders have to be exactly the same."),
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1355201)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy)
@@ -37,10 +37,14 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.udf.UDAFPercentile;
import org.apache.hadoop.hive.ql.udf.UDFAbs;
import org.apache.hadoop.hive.ql.udf.UDFAcos;
@@ -157,7 +161,6 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr;
@@ -168,8 +171,8 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap;
@@ -192,6 +195,7 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFReflect;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSentences;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
@@ -225,10 +229,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.ReflectionUtils;
-
import org.w3c.dom.Document;
import org.w3c.dom.Element;
-import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
@@ -636,11 +638,17 @@
/**
* Find a common class for union-all operator
+ * @throws UDFArgumentException
*/
- public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) {
+ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b)
+ throws UDFArgumentException {
+
if (a.equals(b)) {
return a;
}
+
+ validateConversionPrecision(a, b);
+
if (FunctionRegistry.implicitConvertable(a, b)) {
return b;
}
@@ -664,12 +672,18 @@
* them to double and then compare.
*
* @return null if no common class could be found.
+ * @throws UDFArgumentException
*/
- public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) {
+ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b)
+ throws UDFArgumentException {
+
// If same return one of them
if (a.equals(b)) {
return a;
}
+
+ validateConversionPrecision(a, b);
+
for (TypeInfo t : numericTypeList) {
if (FunctionRegistry.implicitConvertable(a, t)
&& FunctionRegistry.implicitConvertable(b, t)) {
@@ -680,6 +694,41 @@
}
/**
+ * Checks whether Hive's default method of type conversion for two types could result in a loss
+ * of precision. Either throws an exception or displays a warning.
+ *
+ * @throws UDFArgumentException
+ */
+ public static void validateConversionPrecision(TypeInfo a, TypeInfo b)
+ throws UDFArgumentException {
+
+ HiveConf conf = SessionState.get().getConf();
+ LogHelper console = SessionState.getConsole();
+
+ if ((a.equals(TypeInfoFactory.stringTypeInfo) && b.equals(TypeInfoFactory.longTypeInfo)) ||
+ (a.equals(TypeInfoFactory.longTypeInfo) && b.equals(TypeInfoFactory.stringTypeInfo))) {
+ if (HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVEBIGINTCOMPARISIONMODE).equalsIgnoreCase("strict")) {
+ throw new UDFArgumentException(ErrorMsg.NO_COMPARE_BIGINT_STRING.getMsg());
+ } else {
+ console.printError("WARNING: Comparing, joining on, or unioning a bigint and a string " +
+ "as part of a join may result in a loss of precision.");
+ }
+ } else if ((a.equals(TypeInfoFactory.doubleTypeInfo) &&
+ b.equals(TypeInfoFactory.longTypeInfo)) ||
+ (a.equals(TypeInfoFactory.longTypeInfo) &&
+ b.equals(TypeInfoFactory.doubleTypeInfo))) {
+ if (HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVEBIGINTCOMPARISIONMODE).equalsIgnoreCase("strict")) {
+ throw new UDFArgumentException(ErrorMsg.NO_COMPARE_BIGINT_DOUBLE.getMsg());
+ } else {
+ console.printError("WARNING: Comparing, joining on, or unioning a bigint and a double " +
+ "as part of a join may result in a loss of precision.");
+ }
+ }
+ }
+
+ /**
* Find a common class that objects of both TypeInfo a and TypeInfo b can
* convert to. This is used for places other than comparison.
*
Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java (revision 1355201)
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java (working copy)
@@ -136,6 +136,8 @@
if (oiTypeInfo0 != oiTypeInfo1) {
compareType = CompareType.NEED_CONVERT;
+ FunctionRegistry.validateConversionPrecision(oiTypeInfo0, oiTypeInfo1);
+
// If either argument is a string, we convert to a double because a number
// in string form should always be convertible into a double
if (oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo)