Index: ql/src/test/results/clientpositive/udf_parse_url.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_parse_url.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_parse_url.q.out (revision 0) @@ -0,0 +1,56 @@ +query: EXPLAIN +SELECT parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY:k2') + FROM src WHERE key = 86 +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'HOST')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'PATH')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'QUERY')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'REF')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'QUERY:k2'))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Filter Operator + predicate: + expr: (UDFToDouble(key) = UDFToDouble(86)) + type: boolean + Select Operator + expressions: + expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') + type: string + expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH') + type: string + expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') + type: string + expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') + type: string + expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY:k2') + type: string + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +query: SELECT parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY:k2') + FROM src WHERE key = 86 +Input: default/src +Output: file:/mnt/vol/devrs008.snc1/suresh/hive_external/build/ql/tmp/1759372767/10000 +facebook.com /path1/p.php k1=v1&k2=v2 Ref1 v2 Index: ql/src/test/queries/clientpositive/udf_parse_url.q =================================================================== --- ql/src/test/queries/clientpositive/udf_parse_url.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_parse_url.q (revision 0) @@ -0,0 +1,14 @@ +EXPLAIN +SELECT parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY:k2') + FROM src WHERE key = 86; + +SELECT parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH'), +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') , +parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY:k2') + FROM src WHERE key = 86; \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 785001) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -83,6 +83,7 @@ registerUDF("regexp", UDFRegExp.class, OperatorType.INFIX, true); registerUDF("regexp_replace", UDFRegExpReplace.class, OperatorType.PREFIX, false); registerUDF("regexp_extract", UDFRegExpExtract.class, OperatorType.PREFIX, false); + registerUDF("parse_url", UDFParseUrl.class, OperatorType.PREFIX, false); registerUDF("positive", UDFOPPositive.class, OperatorType.PREFIX, true, "+"); registerUDF("negative", UDFOPNegative.class, OperatorType.PREFIX, true, "-"); Index: ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistory.java (revision 785001) +++ ql/src/java/org/apache/hadoop/hive/ql/history/HiveHistory.java (working copy) @@ -359,7 +359,7 @@ public void printRowCount(String queryId){ QueryInfo ji = queryInfoMap.get(queryId); for (String tab: ji.rowCountMap.keySet()){ - console.printInfo(ji.rowCountMap.get(tab)+" Rows loaded to "+ tab); + console.printInfo(ji.rowCountMap.get(tab)+" Rows loaded to c"+ tab); } } /** Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFParseUrl.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFParseUrl.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFParseUrl.java (revision 0) @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + + +import java.net.URL; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.UDF; + +/** + * UDF to extract specfic parts from URL + * For example, parse_url('http://facebook.com/path/p1.php?query=1', 'HOST') will return 'facebook.com' + * For example, parse_url('http://facebook.com/path/p1.php?query=1', 'PATH') will return 'path/p1.php' + * Possible values are HOST,PATH,QUERY,REF,PROTOCOL,AUTHORITY,FILE,USERINFO + * Also you can get a value of particular key in QUERY, using syntax QUERY: eg: QUERY:k1. + */ +public class UDFParseUrl extends UDF { + private static Log LOG = LogFactory.getLog(UDFParseUrl.class.getName()); + + private String lastUrlStr = null; + private URL url = null; + public UDFParseUrl() { + } + + public String evaluate(String urlStr, String partToExtarct) { + if (urlStr == null || partToExtarct == null) { + return null; + } + + if (lastUrlStr == null || !urlStr.equals(lastUrlStr)){ + try { + url = new URL(urlStr); + } + catch(Exception e){ + return null; + } + } + lastUrlStr = urlStr; + + if (partToExtarct.equals("HOST")) + return url.getHost(); + if (partToExtarct.equals("PATH")) + return url.getPath(); + if (partToExtarct.equals("QUERY")) + return url.getQuery(); + if (partToExtarct.equals("REF")) + return url.getRef(); + if (partToExtarct.equals("PROTOCOL")) + return url.getProtocol(); + if (partToExtarct.equals("FILE")) + return url.getFile(); + if (partToExtarct.equals("AUTHORITY")) + return url.getAuthority(); + if (partToExtarct.equals("USERINFO")) + return url.getUserInfo(); + + String arr[] = partToExtarct.split(":"); + if (arr.length != 2) + return null; + if (arr[0].equals("QUERY")) + { + String kk = arr[1]; + String qArr[] = url.getQuery().split("&"); + for(int i=0; i