Index: ql/src/test/results/clientpositive/udtf_json_tuple.q.out =================================================================== --- ql/src/test/results/clientpositive/udtf_json_tuple.q.out (revision 0) +++ ql/src/test/results/clientpositive/udtf_json_tuple.q.out (revision 0) @@ -0,0 +1,461 @@ +PREHOOK: query: create table json_t (key string, jstring string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table json_t (key string, jstring string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@json_t +PREHOOK: query: insert overwrite table json_t +select * from ( + select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1 + union all + select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1 + union all + select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1 + union all + select '4', cast(null as string) from src limit 1 + union all + select '5', '{"f1": "", "f5": null}' from src limit 1 + union all + select '6', '[invalid JSON string]' from src limit 1 +) s +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@json_t +POSTHOOK: query: insert overwrite table json_t +select * from ( + select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1 + union all + select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1 + union all + select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1 + union all + select '4', cast(null as string) from src limit 1 + union all + select '5', '{"f1": "", "f5": null}' from src limit 1 + union all + select '6', '[invalid JSON string]' from src limit 1 +) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@json_t +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +PREHOOK: query: explain +select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF b))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: jstring + type: string + expr: 'f1' + type: string + expr: 'f2' + type: string + expr: 'f3' + type: string + expr: 'f4' + type: string + expr: 'f5' + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + UDTF Operator + function name: json_tuple + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +PREHOOK: type: QUERY +PREHOOK: Input: default@json_t +PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-09_840_8200365876234855592/-mr-10000 +POSTHOOK: query: select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@json_t +POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-09_840_8200365876234855592/-mr-10000 +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +4 NULL NULL NULL NULL NULL +3 value13 2 value33 value44 5.01 +2 value12 2 value3 4.01 NULL +1 value1 value2 3 NULL 5.23 +5 NULL NULL NULL NULL +PREHOOK: query: explain +select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a +POSTHOOK: type: QUERY +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF json_t a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: jstring + type: string + expr: 'f1' + type: string + expr: 'f2' + type: string + expr: 'f3' + type: string + expr: 'f4' + type: string + expr: 'f5' + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + UDTF Operator + function name: json_tuple + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a +PREHOOK: type: QUERY +PREHOOK: Input: default@json_t +PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-13_794_8613454555471840841/-mr-10000 +POSTHOOK: query: select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@json_t +POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-13_794_8613454555471840841/-mr-10000 +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +NULL NULL NULL NULL NULL +value13 2 value33 value44 5.01 +value12 2 value3 4.01 NULL +value1 value2 3 NULL 5.23 + NULL NULL NULL NULL +PREHOOK: query: explain +select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f5))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col3 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: jstring + type: string + expr: 'f1' + type: string + expr: 'f2' + type: string + expr: 'f3' + type: string + expr: 'f4' + type: string + expr: 'f5' + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + UDTF Operator + function name: json_tuple + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col3 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +PREHOOK: type: QUERY +PREHOOK: Input: default@json_t +PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-17_260_2655690577880014370/-mr-10000 +POSTHOOK: query: select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@json_t +POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-17_260_2655690577880014370/-mr-10000 +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +4 NULL NULL +3 2 5.01 +2 2 NULL +1 value2 5.23 +5 NULL NULL +PREHOOK: query: explain +select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL f1))) (TOK_GROUPBY (TOK_TABLE_OR_COL f2)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Filter Operator + predicate: + expr: _col2 is not null + type: boolean + Select Operator + expressions: + expr: _col3 + type: string + outputColumnNames: _col3 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col3 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Select Operator + expressions: + expr: jstring + type: string + expr: 'f1' + type: string + expr: 'f2' + type: string + expr: 'f3' + type: string + expr: 'f4' + type: string + expr: 'f5' + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + UDTF Operator + function name: json_tuple + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Filter Operator + predicate: + expr: _col2 is not null + type: boolean + Select Operator + expressions: + expr: _col3 + type: string + outputColumnNames: _col3 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col3 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 +PREHOOK: type: QUERY +PREHOOK: Input: default@json_t +PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-20_835_55486591128179740/-mr-10000 +POSTHOOK: query: select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@json_t +POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-20_835_55486591128179740/-mr-10000 +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +NULL 1 +2 2 +value2 1 Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 984470) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -69,6 +69,7 @@ int isnotnull isnull +json_tuple lcase length like @@ -175,6 +176,7 @@ e explode from_unixtime +json_tuple lcase like locate Index: ql/src/test/queries/clientpositive/udtf_json_tuple.q =================================================================== --- ql/src/test/queries/clientpositive/udtf_json_tuple.q (revision 0) +++ ql/src/test/queries/clientpositive/udtf_json_tuple.q (revision 0) @@ -0,0 +1,36 @@ +create table json_t (key string, jstring string); + +insert overwrite table json_t +select * from ( + select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1 + union all + select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1 + union all + select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1 + union all + select '4', cast(null as string) from src limit 1 + union all + select '5', '{"f1": "", "f5": null}' from src limit 1 + union all + select '6', '[invalid JSON string]' from src limit 1 +) s; + +explain +select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5; + +select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5; + +explain +select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a; + +select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a; + +explain +select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5; + +select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5; + +explain +select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2; + +select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 984470) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -130,13 +130,12 @@ import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo; @@ -148,6 +147,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains; @@ -173,6 +173,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple; import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo; import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath; import org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean; @@ -394,6 +395,7 @@ // Generic UDTF's registerGenericUDTF("explode", GenericUDTFExplode.class); + registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class); } public static void registerTemporaryUDF(String functionName, @@ -734,8 +736,9 @@ } public static GenericUDAFResolver getGenericUDAFResolver(String functionName) { - if (LOG.isDebugEnabled()) + if (LOG.isDebugEnabled()) { LOG.debug("Looking up GenericUDAF: " + functionName); + } FunctionInfo finfo = mFunctions.get(functionName.toLowerCase()); if (finfo == null) { return null; @@ -873,10 +876,11 @@ conversionCost += cost; } } - if (LOG.isDebugEnabled()) + if (LOG.isDebugEnabled()) { LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = " + argumentsPassed + " accepted = " + argumentsAccepted + " method = " + m); + } if (match) { // Always choose the function with least implicit conversions. if (conversionCost < leastConversionCost) { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (revision 0) @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.io.Text; +import org.json.JSONException; +import org.json.JSONObject; +/** + * GenericUDTFJSONTuple: this + * + */ +@Description(name = "json_tuple", + value = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. " + + "All the input parameters and output column types are string.") + +public class GenericUDTFJSONTuple extends GenericUDTF { + + private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName()); + + int numCols; // number of output columns + String[] paths; // array of path expressions, each of which corresponds to a column + Text[] retCols; // array of returned column values + Text[] cols; // object pool of non-null Text, avoid creating objects all the time + Object[] nullCols; // array of null column values + ObjectInspector[] inputOIs; // input ObjectInspectors + boolean pathParsed = false; + boolean seenErrors = false; + + @Override + public void close() throws HiveException { + } + + @Override + public StructObjectInspector initialize(ObjectInspector[] args) + throws UDFArgumentException { + + inputOIs = args; + numCols = args.length - 1; + + if (numCols < 1) { + throw new UDFArgumentException("json_tuple() takes at least two arguments: " + + "the json string and a path expression"); + } + + for (int i = 0; i < args.length; ++i) { + if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE || + !args[i].getTypeName().equals(Constants.STRING_TYPE_NAME)) { + throw new UDFArgumentException("json_tuple()'s arguments have to be string type"); + } + } + + seenErrors = false; + pathParsed = false; + paths = new String[numCols]; + cols = new Text[numCols]; + retCols = new Text[numCols]; + nullCols = new Object[numCols]; + + for (int i = 0; i < numCols; ++i) { + cols[i] = new Text(); + retCols[i] = cols[i]; + nullCols[i] = null; + } + + // construct output object inspector + ArrayList fieldNames = new ArrayList(numCols); + ArrayList fieldOIs = new ArrayList(numCols); + for (int i = 0; i < numCols; ++i) { + // column name can be anything since it will be named by UDTF as clause + fieldNames.add("c" + i); + // all returned type will be Text + fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + } + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + } + + @Override + public void process(Object[] o) throws HiveException { + + if (o[0] == null) { + forward(nullCols); + return; + } + // get the path expression for the 1st row only + if (!pathParsed) { + for (int i = 0;i < numCols; ++i) { + paths[i] = ((StringObjectInspector) inputOIs[i+1]).getPrimitiveJavaObject(o[i+1]); + } + pathParsed = true; + } + + String jsonStr = ((StringObjectInspector) inputOIs[0]).getPrimitiveJavaObject(o[0]); + if (jsonStr == null) { + forward(nullCols); + return; + } + try { + JSONObject jsonObj = new JSONObject(jsonStr); + + for (int i = 0; i < numCols; ++i) { + if (jsonObj.isNull(paths[i])) { + retCols[i] = null; + } else { + if (retCols[i] == null) { + retCols[i] = cols[i]; // use the object pool rather than creating a new object + } + retCols[i].set(jsonObj.getString(paths[i])); + } + } + forward(retCols); + } catch (JSONException e) { + // parsing error, invalid JSON string + if (!seenErrors) { + LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error messages in the future."); + seenErrors = true; + } + } + } + + @Override + public String toString() { + return "json_tuple"; + } +}