diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java index cf26fce00f..97aa897740 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java @@ -129,7 +129,11 @@ private void prepareInSet(DeferredObject[] arguments) throws HiveException { } } else { for (int i = 1; i < arguments.length; ++i) { - constantInSet.add(((ConstantObjectInspector) argumentOIs[i]).getWritableConstantValue()); + if (!conversionHelper.constantConversionRequired) { + constantInSet.add(((ConstantObjectInspector) argumentOIs[i]).getWritableConstantValue()); + } else { + constantInSet.add(conversionHelper.convertIfNecessary(arguments[i].get(), argumentOIs[i])); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java index c91865b173..6a15d20856 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java @@ -32,16 +32,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.*; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; @@ -83,6 +79,7 @@ public static boolean isUtfStartByte(byte b) { boolean allowTypeConversion; ObjectInspector returnObjectInspector; + boolean constantConversionRequired; // We create converters beforehand, so that the converters can reuse the // same object for returning conversion results. @@ -173,6 +170,15 @@ private boolean update(ObjectInspector oi, boolean isUnionAll) throws UDFArgumen commonTypeInfo = updateCommonTypeForDecimal(commonTypeInfo, oiTypeInfo, rTypeInfo); + if (oi instanceof StandardConstantStructObjectInspector) { + if ((oiTypeInfo.getCategory() == Category.STRUCT + && rTypeInfo.getCategory() == Category.STRUCT) + && !isUnionAll + && !oiTypeInfo.equals(commonTypeInfo)) { + constantConversionRequired = true; + } + } + returnObjectInspector = TypeInfoUtils .getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo); diff --git ql/src/test/queries/clientpositive/struct_in_filter.q ql/src/test/queries/clientpositive/struct_in_filter.q new file mode 100644 index 0000000000..33ba444d0a --- /dev/null +++ ql/src/test/queries/clientpositive/struct_in_filter.q @@ -0,0 +1,35 @@ +set hive.explain.user=false; + +create table table1(col0 int, col1 bigint, col2 string, col3 bigint, col4 bigint); + +insert into table1 values (1, 10000, 'cc4' ,2014, 11); +insert into table1 values (2, 10000, 'cc3' ,2015, 11); +insert into table1 values (3, 10000, 'cc2' ,2014, 11); +insert into table1 values (4, 10000, 'cc1' ,2013, 11); + +-- Hive-18999 test case +-- INCORRECT before the HIVE-18999 patch: BIGINT column versus INT constant +SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct(2014, 11)); +-- CORRECT: INT column versus STRING constant +SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct('2014', '11')); +SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct('2014', 11)); + +-- Similar test cases with different data types +-- INT column versus INT constant +create table table2(col2 int); +insert into table2 values (2010); +insert into table2 values (2012); +insert into table2 values (2014); +insert into table2 values (2016); +insert into table2 values (2014); +SELECT COUNT(t2.col2) from table2 t2 where struct(col2) in (struct(2014)); + +-- SMALLINT column versus INT constant +create table table3(col3 smallint); +insert into table3 values (2010); +insert into table3 values (2012); +insert into table3 values (2014); +insert into table3 values (2016); +insert into table3 values (2014); +SELECT COUNT(t3.col3) from table3 t3 where struct(col3) in (struct(2014)); + diff --git ql/src/test/results/clientpositive/struct_in_filter.q.out ql/src/test/results/clientpositive/struct_in_filter.q.out new file mode 100644 index 0000000000..0160a67e7f --- /dev/null +++ ql/src/test/results/clientpositive/struct_in_filter.q.out @@ -0,0 +1,211 @@ +PREHOOK: query: create table table1(col0 int, col1 bigint, col2 string, col3 bigint, col4 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: create table table1(col0 int, col1 bigint, col2 string, col3 bigint, col4 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table1 values (1, 10000, 'cc4' ,2014, 11) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table1 values (1, 10000, 'cc4' ,2014, 11) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.col0 SCRIPT [] +POSTHOOK: Lineage: table1.col1 SCRIPT [] +POSTHOOK: Lineage: table1.col2 SCRIPT [] +POSTHOOK: Lineage: table1.col3 SCRIPT [] +POSTHOOK: Lineage: table1.col4 SCRIPT [] +PREHOOK: query: insert into table1 values (2, 10000, 'cc3' ,2015, 11) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table1 values (2, 10000, 'cc3' ,2015, 11) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.col0 SCRIPT [] +POSTHOOK: Lineage: table1.col1 SCRIPT [] +POSTHOOK: Lineage: table1.col2 SCRIPT [] +POSTHOOK: Lineage: table1.col3 SCRIPT [] +POSTHOOK: Lineage: table1.col4 SCRIPT [] +PREHOOK: query: insert into table1 values (3, 10000, 'cc2' ,2014, 11) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table1 values (3, 10000, 'cc2' ,2014, 11) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.col0 SCRIPT [] +POSTHOOK: Lineage: table1.col1 SCRIPT [] +POSTHOOK: Lineage: table1.col2 SCRIPT [] +POSTHOOK: Lineage: table1.col3 SCRIPT [] +POSTHOOK: Lineage: table1.col4 SCRIPT [] +PREHOOK: query: insert into table1 values (4, 10000, 'cc1' ,2013, 11) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table1 values (4, 10000, 'cc1' ,2013, 11) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.col0 SCRIPT [] +POSTHOOK: Lineage: table1.col1 SCRIPT [] +POSTHOOK: Lineage: table1.col2 SCRIPT [] +POSTHOOK: Lineage: table1.col3 SCRIPT [] +POSTHOOK: Lineage: table1.col4 SCRIPT [] +PREHOOK: query: SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct(2014, 11)) +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct(2014, 11)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct('2014', '11')) +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct('2014', '11')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct('2014', 11)) +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t1.col0) from table1 t1 where struct(col3, col4) in (struct('2014', 11)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +2 +PREHOOK: query: create table table2(col2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: create table table2(col2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table2 values (2010) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table2 values (2010) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.col2 SCRIPT [] +PREHOOK: query: insert into table2 values (2012) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table2 values (2012) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.col2 SCRIPT [] +PREHOOK: query: insert into table2 values (2014) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table2 values (2014) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.col2 SCRIPT [] +PREHOOK: query: insert into table2 values (2016) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table2 values (2016) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.col2 SCRIPT [] +PREHOOK: query: insert into table2 values (2014) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table2 values (2014) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.col2 SCRIPT [] +PREHOOK: query: SELECT COUNT(t2.col2) from table2 t2 where struct(col2) in (struct(2014)) +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t2.col2) from table2 t2 where struct(col2) in (struct(2014)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +2 +PREHOOK: query: create table table3(col3 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: create table table3(col3 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table3 values (2010) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table3 values (2010) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.col3 SCRIPT [] +PREHOOK: query: insert into table3 values (2012) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table3 values (2012) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.col3 SCRIPT [] +PREHOOK: query: insert into table3 values (2014) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table3 values (2014) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.col3 SCRIPT [] +PREHOOK: query: insert into table3 values (2016) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table3 values (2016) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.col3 SCRIPT [] +PREHOOK: query: insert into table3 values (2014) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table3 values (2014) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.col3 SCRIPT [] +PREHOOK: query: SELECT COUNT(t3.col3) from table3 t3 where struct(col3) in (struct(2014)) +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t3.col3) from table3 t3 where struct(col3) in (struct(2014)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +2