Index: ql/src/test/results/clientpositive/udf_unhex.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_unhex.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_unhex.q.out (revision 0) @@ -0,0 +1,21 @@ +query: -- Good inputs + +SELECT + unhex('4D7953514C'), + unhex('31323637'), + unhex('61'), + unhex('2D34'), + unhex('') +FROM src limit 1 +Input: default/src +Output: file:/mnt/vol/devrs002.snc1/mkedia/hive_trunk/build/ql/tmp/1289795595/10000 +MySQL 1267 a -4 +query: -- Bad inputs +SELECT + unhex('MySQL'), + unhex('G123'), + unhex('\0') +FROM src limit 1 +Input: default/src +Output: file:/mnt/vol/devrs002.snc1/mkedia/hive_trunk/build/ql/tmp/328234582/10000 +NULL NULL NULL Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 811565) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -103,6 +103,7 @@ to_date trim ucase +unhex unix_timestamp upper var_pop Index: ql/src/test/results/clientpositive/describe_function.q.out =================================================================== --- ql/src/test/results/clientpositive/describe_function.q.out (revision 811565) +++ ql/src/test/results/clientpositive/describe_function.q.out (working copy) @@ -60,6 +60,8 @@ bin(n) - returns n in binary query: DESCRIBE FUNCTION hex hex(n or str) - Convert the argument to hexadecimal +query: DESCRIBE FUNCTION unhex +unhex(str) - Converts hexadecimal argument to string query: DESCRIBE FUNCTION upper upper(str) - Returns str with all characters changed to uppercase query: DESCRIBE FUNCTION lower @@ -343,6 +345,25 @@ 'H1' > SELECT hex('Facebook') FROM src LIMIT 1; '46616365626F6F6B' +query: DESCRIBE FUNCTION EXTENDED unhex +unhex(str) - Converts hexadecimal argument to string +Performs the inverse operation of HEX(str). That is, it interprets +each pair of hexadecimal digits in the argument as a number and +converts it to the character represented by the number. The +resulting characters are returned as a binary string. + +Example: +> SELECT UNHEX('4D7953514C') from src limit 1; +'MySQL' +> SELECT UNHEX(HEX('string')) from src limit 1; +'string' +> SELECT HEX(UNHEX('1267')) from src limit 1; +'1267' + +The characters in the argument string must be legal hexadecimal +digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters +any nonhexadecimal digits in the argument, it returns NULL. Also, +if there are an odd number of characters a leading 0 is appended. query: DESCRIBE FUNCTION EXTENDED upper upper(str) - Returns str with all characters changed to uppercase Example: Index: ql/src/test/queries/clientpositive/udf_unhex.q =================================================================== --- ql/src/test/queries/clientpositive/udf_unhex.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_unhex.q (revision 0) @@ -0,0 +1,16 @@ +-- Good inputs + +SELECT + unhex('4D7953514C'), + unhex('31323637'), + unhex('61'), + unhex('2D34'), + unhex('') +FROM src limit 1; + +-- Bad inputs +SELECT + unhex('MySQL'), + unhex('G123'), + unhex('\0') +FROM src limit 1; Index: ql/src/test/queries/clientpositive/describe_function.q =================================================================== --- ql/src/test/queries/clientpositive/describe_function.q (revision 811565) +++ ql/src/test/queries/clientpositive/describe_function.q (working copy) @@ -29,6 +29,7 @@ DESCRIBE FUNCTION conv; DESCRIBE FUNCTION bin; DESCRIBE FUNCTION hex; +DESCRIBE FUNCTION unhex; DESCRIBE FUNCTION upper; DESCRIBE FUNCTION lower; DESCRIBE FUNCTION ucase; @@ -112,6 +113,7 @@ DESCRIBE FUNCTION EXTENDED conv; DESCRIBE FUNCTION EXTENDED bin; DESCRIBE FUNCTION EXTENDED hex; +DESCRIBE FUNCTION EXTENDED unhex; DESCRIBE FUNCTION EXTENDED upper; DESCRIBE FUNCTION EXTENDED lower; DESCRIBE FUNCTION EXTENDED ucase; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 811565) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -94,6 +94,7 @@ registerUDF("conv", UDFConv.class, false); registerUDF("bin", UDFBin.class, false); registerUDF("hex", UDFHex.class, false); + registerUDF("unhex", UDFUnhex.class, false); registerUDF("upper", UDFUpper.class, false); registerUDF("lower", UDFLower.class, false); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java (revision 0) @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.description; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +@description( + name = "unhex", + value = "_FUNC_(str) - Converts hexadecimal argument to string", + extended = "Performs the inverse operation of HEX(str). That is, it interprets\n" + + "each pair of hexadecimal digits in the argument as a number and\n" + + "converts it to the character represented by the number. The\n" + + "resulting characters are returned as a binary string.\n\n" + + "Example:\n" + + "> SELECT UNHEX('4D7953514C') from src limit 1;\n" + + "'MySQL'\n" + + "> SELECT UNHEX(HEX('string')) from src limit 1;\n" + + "'string'\n" + + "> SELECT HEX(UNHEX('1267')) from src limit 1;\n" + + "'1267'\n\n" + + "The characters in the argument string must be legal hexadecimal\n" + + "digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters\n" + + "any nonhexadecimal digits in the argument, it returns NULL. Also,\n" + + "if there are an odd number of characters a leading 0 is appended." + ) +public class UDFUnhex extends UDF { + + /** + * Convert every two hex digits in s into + * + */ + public Text evaluate(Text s) { + if (s == null) { + return null; + } + + //append a leading 0 if needed + String str; + if (s.getLength() % 2 == 1) + str = "0" + s.toString(); + else + str = s.toString(); + + byte [] result = new byte[str.length()/2]; + for(int i = 0; i < str.length(); i += 2) { + try { + result[i/2] = ((byte) Integer.parseInt(str.substring(i, i+2), 16)); + } catch (NumberFormatException e) { + //invalid character present, return null + return null; + } + } + + return new Text(result); + } +}