diff --git ql/src/test/queries/clientpositive/udf_hash2.q ql/src/test/queries/clientpositive/udf_hash2.q new file mode 100644 index 0000000..0311091 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_hash2.q @@ -0,0 +1,12 @@ +select + -- string/char/varchar types should match + hash(value) = hash(cast(value as varchar(10))), + hash(value) = hash(cast(value as varchar(20))), + hash(value) = hash(cast(value as char(10))), + hash(value) = hash(cast(value as char(20))), + hash(cast(value as varchar(10))) = hash(cast(value as char(20))), + -- integral types should also have similar hash values + hash(cast(5 as smallint)) = hash(cast(5 as int)), + hash(cast(key as smallint)) = hash(cast(key as int)), + hash(cast(key as bigint)) = hash(cast(key as int)) +from src limit 1; diff --git ql/src/test/results/clientpositive/udf_hash2.q.out ql/src/test/results/clientpositive/udf_hash2.q.out new file mode 100644 index 0000000..a94ef68 --- /dev/null +++ ql/src/test/results/clientpositive/udf_hash2.q.out @@ -0,0 +1,31 @@ +PREHOOK: query: select + -- string/char/varchar types should match + hash(value) = hash(cast(value as varchar(10))), + hash(value) = hash(cast(value as varchar(20))), + hash(value) = hash(cast(value as char(10))), + hash(value) = hash(cast(value as char(20))), + hash(cast(value as varchar(10))) = hash(cast(value as char(20))), + -- integral types should also have similar hash values + hash(cast(5 as smallint)) = hash(cast(5 as int)), + hash(cast(key as smallint)) = hash(cast(key as int)), + hash(cast(key as bigint)) = hash(cast(key as int)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + -- string/char/varchar types should match + hash(value) = hash(cast(value as varchar(10))), + hash(value) = hash(cast(value as varchar(20))), + hash(value) = hash(cast(value as char(10))), + hash(value) = hash(cast(value as char(20))), + hash(cast(value as varchar(10))) = hash(cast(value as char(20))), + -- integral types should also have similar hash values + hash(cast(5 as smallint)) = hash(cast(5 as int)), + hash(cast(key as smallint)) = hash(cast(key as int)), + hash(cast(key as bigint)) = hash(cast(key as int)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true true true diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index d307b0f..4233dde 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -523,16 +523,16 @@ public static int hashCode(Object o, ObjectInspector objIns) { // all characters are ASCII, while Text.hashCode() always returns a // different result. Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o); - int r = 0; - for (int i = 0; i < t.getLength(); i++) { - r = r * 31 + t.getBytes()[i]; - } - return r; + return hashCodeText(t); } case CHAR: - return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); + // behavior should match string type + return hashCodeText( + ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).getStrippedValue()); case VARCHAR: - return ((HiveVarcharObjectInspector)poi).getPrimitiveWritableObject(o).hashCode(); + // behavior should match string type + return hashCodeText( + ((HiveVarcharObjectInspector)poi).getPrimitiveWritableObject(o).getTextValue()); case BINARY: return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); @@ -593,6 +593,19 @@ public static int hashCode(Object o, ObjectInspector objIns) { } /** + * Hashes a Text object so that it returns the same value as String.hashCode(). + * @param t + * @return + */ + public static int hashCodeText(Text t) { + int r = 0; + for (int i = 0; i < t.getLength(); i++) { + r = r * 31 + t.getBytes()[i]; + } + return r; + } + + /** * Compare two arrays of objects with their respective arrays of * ObjectInspectors. */