diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java index ea52537..51d05d6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInFile.java @@ -26,6 +26,8 @@ import java.io.InputStreamReader; import java.util.HashSet; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; @@ -59,29 +61,38 @@ public ObjectInspector initialize(ObjectInspector[] arguments) "IN_FILE() accepts exactly 2 arguments."); } - for (int i = 0; i < arguments.length; i++) { - if (!String.class.equals( - PrimitiveObjectInspectorUtils. - getJavaPrimitiveClassFromObjectInspector(arguments[i]))) { - throw new UDFArgumentTypeException(i, "The " - + GenericUDFUtils.getOrdinal(i + 1) - + " argument of function IN_FILE must be a string but " - + arguments[i].toString() + " was given."); - } - } - strObjectInspector = arguments[0]; fileObjectInspector = arguments[1]; - if (!ObjectInspectorUtils.isConstantObjectInspector(fileObjectInspector)) { - throw new UDFArgumentTypeException(1, - "The second argument of IN_FILE() must be a constant string but " + - fileObjectInspector.toString() + " was given."); + if (!isTypeCompatible(strObjectInspector)) { + throw new UDFArgumentTypeException(0, "The first " + + "argument of function IN_FILE must be a string, " + + "char or varchar but " + + strObjectInspector.toString() + " was given."); + } + + if (!String.class.equals( + PrimitiveObjectInspectorUtils. + getJavaPrimitiveClassFromObjectInspector(fileObjectInspector)) || + !ObjectInspectorUtils.isConstantObjectInspector(fileObjectInspector)) { + throw new UDFArgumentTypeException(1, "The second " + + "argument of IN_FILE() must be a constant string but " + + fileObjectInspector.toString() + " was given."); } return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector; } + private boolean isTypeCompatible(ObjectInspector argument) { + return + String.class.equals(PrimitiveObjectInspectorUtils. + getJavaPrimitiveClassFromObjectInspector(argument)) || + HiveChar.class.equals(PrimitiveObjectInspectorUtils. + getJavaPrimitiveClassFromObjectInspector(argument)) || + HiveVarchar.class.equals(PrimitiveObjectInspectorUtils. + getJavaPrimitiveClassFromObjectInspector(argument)); + } + @Override public String[] getRequiredFiles() { return new String[] { @@ -96,8 +107,8 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return null; } - String str = (String)ObjectInspectorUtils.copyToStandardJavaObject( - arguments[0].get(), strObjectInspector); + String str = ObjectInspectorUtils.copyToStandardJavaObject( + arguments[0].get(), strObjectInspector).toString(); if (set == null) { String fileName = (String)ObjectInspectorUtils.copyToStandardJavaObject( diff --git ql/src/test/queries/clientpositive/udf_in_file.q ql/src/test/queries/clientpositive/udf_in_file.q index 9d9efe8..a045166 100644 --- ql/src/test/queries/clientpositive/udf_in_file.q +++ ql/src/test/queries/clientpositive/udf_in_file.q @@ -1,12 +1,30 @@ DESCRIBE FUNCTION in_file; +CREATE TABLE value_src (str_val char(3), ch_val STRING, vch_val varchar(10), + str_val_neg char(3), ch_val_neg STRING, vch_val_neg varchar(10)) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; + +LOAD DATA LOCAL INPATH '../../data/files/in_file.dat' INTO TABLE value_src; + EXPLAIN -SELECT in_file("303", "../../data/files/test2.dat"), +SELECT in_file(str_val, "../../data/files/test2.dat"), + in_file(ch_val, "../../data/files/test2.dat"), + in_file(vch_val, "../../data/files/test2.dat"), + in_file(str_val_neg, "../../data/files/test2.dat"), + in_file(ch_val_neg, "../../data/files/test2.dat"), + in_file(vch_val_neg, "../../data/files/test2.dat"), + in_file("303", "../../data/files/test2.dat"), in_file("304", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") -FROM src LIMIT 1; +FROM value_src LIMIT 1; -SELECT in_file("303", "../../data/files/test2.dat"), +SELECT in_file(str_val, "../../data/files/test2.dat"), + in_file(ch_val, "../../data/files/test2.dat"), + in_file(vch_val, "../../data/files/test2.dat"), + in_file(str_val_neg, "../../data/files/test2.dat"), + in_file(ch_val_neg, "../../data/files/test2.dat"), + in_file(vch_val_neg, "../../data/files/test2.dat"), + in_file("303", "../../data/files/test2.dat"), in_file("304", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") -FROM src LIMIT 1; +FROM value_src LIMIT 1; \ No newline at end of file diff --git ql/src/test/results/clientpositive/udf_in_file.q.out ql/src/test/results/clientpositive/udf_in_file.q.out index b631437..8871b4e 100644 --- ql/src/test/results/clientpositive/udf_in_file.q.out +++ ql/src/test/results/clientpositive/udf_in_file.q.out @@ -3,17 +3,48 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION in_file POSTHOOK: type: DESCFUNCTION in_file(str, filename) - Returns true if str appears in the file +PREHOOK: query: CREATE TABLE value_src (str_val char(3), ch_val STRING, vch_val varchar(10), + str_val_neg char(3), ch_val_neg STRING, vch_val_neg varchar(10)) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE value_src (str_val char(3), ch_val STRING, vch_val varchar(10), + str_val_neg char(3), ch_val_neg STRING, vch_val_neg varchar(10)) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@value_src +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in_file.dat' INTO TABLE value_src +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@value_src +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in_file.dat' INTO TABLE value_src +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@value_src PREHOOK: query: EXPLAIN -SELECT in_file("303", "../../data/files/test2.dat"), +SELECT in_file(str_val, "../../data/files/test2.dat"), + in_file(ch_val, "../../data/files/test2.dat"), + in_file(vch_val, "../../data/files/test2.dat"), + in_file(str_val_neg, "../../data/files/test2.dat"), + in_file(ch_val_neg, "../../data/files/test2.dat"), + in_file(vch_val_neg, "../../data/files/test2.dat"), + in_file("303", "../../data/files/test2.dat"), in_file("304", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") -FROM src LIMIT 1 +FROM value_src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT in_file("303", "../../data/files/test2.dat"), +SELECT in_file(str_val, "../../data/files/test2.dat"), + in_file(ch_val, "../../data/files/test2.dat"), + in_file(vch_val, "../../data/files/test2.dat"), + in_file(str_val_neg, "../../data/files/test2.dat"), + in_file(ch_val_neg, "../../data/files/test2.dat"), + in_file(vch_val_neg, "../../data/files/test2.dat"), + in_file("303", "../../data/files/test2.dat"), in_file("304", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") -FROM src LIMIT 1 +FROM value_src LIMIT 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -24,18 +55,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + alias: value_src + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + expressions: in_file(str_val, '../../data/files/test2.dat') (type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean), in_file(vch_val, '../../data/files/test2.dat') (type: boolean), in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE Limit Number of rows: 1 - Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -47,18 +78,30 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT in_file("303", "../../data/files/test2.dat"), +PREHOOK: query: SELECT in_file(str_val, "../../data/files/test2.dat"), + in_file(ch_val, "../../data/files/test2.dat"), + in_file(vch_val, "../../data/files/test2.dat"), + in_file(str_val_neg, "../../data/files/test2.dat"), + in_file(ch_val_neg, "../../data/files/test2.dat"), + in_file(vch_val_neg, "../../data/files/test2.dat"), + in_file("303", "../../data/files/test2.dat"), in_file("304", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") -FROM src LIMIT 1 +FROM value_src LIMIT 1 PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@value_src #### A masked pattern was here #### -POSTHOOK: query: SELECT in_file("303", "../../data/files/test2.dat"), +POSTHOOK: query: SELECT in_file(str_val, "../../data/files/test2.dat"), + in_file(ch_val, "../../data/files/test2.dat"), + in_file(vch_val, "../../data/files/test2.dat"), + in_file(str_val_neg, "../../data/files/test2.dat"), + in_file(ch_val_neg, "../../data/files/test2.dat"), + in_file(vch_val_neg, "../../data/files/test2.dat"), + in_file("303", "../../data/files/test2.dat"), in_file("304", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") -FROM src LIMIT 1 +FROM value_src LIMIT 1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@value_src #### A masked pattern was here #### -true false NULL +true true true false false false true false NULL