Index: ql/src/test/results/clientpositive/str_to_map.q.out =================================================================== --- ql/src/test/results/clientpositive/str_to_map.q.out (revision 0) +++ ql/src/test/results/clientpositive/str_to_map.q.out (revision 0) @@ -0,0 +1,236 @@ +PREHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (TOK_FUNCTION str_to_map 'a=1,b=2,c=3' ',' '=') 'a'))) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: str_to_map('a=1,b=2,c=3',',','=')['a'] + type: string + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-30_219_5751453763970111229/-mr-10000 +POSTHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-30_219_5751453763970111229/-mr-10000 +1 +1 +1 +PREHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map 'a:1,b:2,c:3'))) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: str_to_map('a:1,b:2,c:3') + type: map + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-32_971_8443403956514928563/-mr-10000 +POSTHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-32_971_8443403956514928563/-mr-10000 +{"b":"2","c":"3","a":"1"} +{"b":"2","c":"3","a":"1"} +{"b":"2","c":"3","a":"1"} +PREHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map 'a:1,b:2,c:3' ',' ':'))) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: str_to_map('a:1,b:2,c:3',',',':') + type: map + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-35_564_3851202109281175385/-mr-10000 +POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-35_564_3851202109281175385/-mr-10000 +{"b":"2","c":"3","a":"1"} +{"b":"2","c":"3","a":"1"} +{"b":"2","c":"3","a":"1"} +PREHOOK: query: explain select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST 'a:1,b:2,c:3') TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST ss)))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (TOK_FUNCTION str_to_map (. (TOK_TABLE_OR_COL t) ss) ',' ':') 'a'))) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src + TableScan + alias: src + Select Operator + expressions: + expr: 'a:1,b:2,c:3' + type: string + outputColumnNames: _col0 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: str_to_map(_col0,',',':')['a'] + type: string + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-38_115_1215470559882057571/-mr-10000 +POSTHOOK: query: select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-38_115_1215470559882057571/-mr-10000 +1 +1 +1 +PREHOOK: query: drop table tbl_s2m +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tbl_s2m +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src limit 3 +PREHOOK: type: CREATETABLE +PREHOOK: Input: default@src +POSTHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src limit 3 +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl_s2m +PREHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_s2m +PREHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-43_265_2928247388668574992/-mr-10000 +POSTHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_s2m +POSTHOOK: Output: file:/tmp/sdong/hive_2010-11-09_17-21-43_265_2928247388668574992/-mr-10000 +444 +444 +444 +PREHOOK: query: drop table tbl_s2m +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_s2m +PREHOOK: Output: default@tbl_s2m +POSTHOOK: query: drop table tbl_s2m +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_s2m +POSTHOOK: Output: default@tbl_s2m Index: ql/src/test/queries/clientpositive/str_to_map.q =================================================================== --- ql/src/test/queries/clientpositive/str_to_map.q (revision 0) +++ ql/src/test/queries/clientpositive/str_to_map.q (revision 0) @@ -0,0 +1,23 @@ +explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3; +select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3; + +explain select str_to_map('a:1,b:2,c:3') from src limit 3; +select str_to_map('a:1,b:2,c:3') from src limit 3; + +explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3; +select str_to_map('a:1,b:2,c:3',',',':') from src limit 3; + +explain select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3; +select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3; + + +drop table tbl_s2m; +create table tbl_s2m as select 'ABC=CC_333=444' as t from src limit 3; + +select str_to_map(t,'_','=')['333'] from tbl_s2m; + +drop table tbl_s2m; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 10041) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -174,6 +174,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSentences; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; @@ -275,6 +276,7 @@ registerUDF("regexp_extract", UDFRegExpExtract.class, false); registerUDF("parse_url", UDFParseUrl.class, false); registerGenericUDF("split", GenericUDFSplit.class); + registerGenericUDF("str_to_map", GenericUDFStringToMap.class); registerUDF("positive", UDFOPPositive.class, true, "+"); registerUDF("negative", UDFOPNegative.class, true, "-"); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java (revision 0) @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.HashMap; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * GenericUDFStringToMap. + * + */ +@Description(name = "str_to_map", value = "_FUNC_(text, delimiter1, delimiter2) - " + + "Creates a map by parsing text ") +public class GenericUDFStringToMap extends GenericUDF { + HashMap ret = new HashMap(); + StringObjectInspector soi_text, soi_de1 = null, soi_de2 = null; + final static String default_de1 = ","; + final static String default_de2 = ":"; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + + if (!TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]).equals( + TypeInfoFactory.stringTypeInfo) + || (arguments.length > 1 && + !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]).equals( + TypeInfoFactory.stringTypeInfo)) + || (arguments.length > 2 && + !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[2]).equals( + TypeInfoFactory.stringTypeInfo))) { + throw new UDFArgumentException("All argument should be string"); + } + + soi_text = (StringObjectInspector) arguments[0]; + if (arguments.length > 1) { + soi_de1 = (StringObjectInspector) arguments[1]; + } + if (arguments.length > 2) { + soi_de2 = (StringObjectInspector) arguments[2]; + } + + return ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.javaStringObjectInspector); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + ret.clear(); + String text = soi_text.getPrimitiveJavaObject(arguments[0].get()); + String delimiter1 = (soi_de1 == null) ? + default_de1 : soi_de1.getPrimitiveJavaObject(arguments[1].get()); + String delimiter2 = (soi_de2 == null) ? + default_de2 : soi_de2.getPrimitiveJavaObject(arguments[2].get()); + + String[] keyValuePairs = text.split(delimiter1); + + for (String keyValuePair : keyValuePairs) { + String[] keyValue = keyValuePair.split(delimiter2, 2); + if (keyValue.length < 2) { + ret.put(keyValuePair, null); + } else { + ret.put(keyValue[0], keyValue[1]); + } + } + + return ret; + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("str_to_map("); + assert (children.length <= 3); + boolean firstChild = true; + for (String child : children) { + if (firstChild) { + firstChild = false; + } else { + sb.append(","); + } + sb.append(child); + } + sb.append(")"); + return sb.toString(); + } +}