Index: ql/src/test/results/clientpositive/udf_conv.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_conv.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_conv.q.out (revision 0) @@ -0,0 +1,83 @@ +query: -- conv must work on both strings and integers up to 64-bit precision + +-- Some simple conversions to test different bases +SELECT + conv('4521', 10, 36), + conv('22', 10, 10), + conv('110011', 2, 16), + conv('facebook', 36, 16) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/623411735/10000 +3HL 22 33 116ED2B2FB4 +query: -- Test negative numbers. If to_base is positive, the number should be handled +-- as a two's complement (64-bit) +SELECT + conv('-641', 10, -10), + conv('1011', 2, -16), + conv('-1', 10, 16), + conv('-15', 10, 16) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1206714471/10000 +-641 B FFFFFFFFFFFFFFFF FFFFFFFFFFFFFFF1 +query: -- Test overflow. If a number is two large, the result should be -1 (if signed) +-- or MAX_LONG (if unsigned) +SELECT + conv('9223372036854775807', 36, 16), + conv('9223372036854775807', 36, -16), + conv('-9223372036854775807', 36, 16), + conv('-9223372036854775807', 36, -16) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1160875939/10000 +FFFFFFFFFFFFFFFF -1 FFFFFFFFFFFFFFFF -1 +query: -- Test with invalid input. If one of the bases is invalid, the result should +-- be NULL. If there is an invalid digit in the number, the longest valid +-- prefix should be converted. +SELECT + conv('123455', 3, 10), + conv('131', 1, 5), + conv('515', 5, 100), + conv('10', -2, 2) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/21887943/10000 +5 NULL NULL NULL +query: -- Perform the same tests with number arguments. + +SELECT + conv(4521, 10, 36), + conv(22, 10, 10), + conv(110011, 2, 16) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/860089866/10000 +3HL 22 33 +query: SELECT + conv(-641, 10, -10), + conv(1011, 2, -16), + conv(-1, 10, 16), + conv(-15, 10, 16) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/2001834317/10000 +-641 B FFFFFFFFFFFFFFFF FFFFFFFFFFFFFFF1 +query: SELECT + conv(9223372036854775807, 36, 16), + conv(9223372036854775807, 36, -16), + conv(-9223372036854775807, 36, 16), + conv(-9223372036854775807, 36, -16) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1609692602/10000 +FFFFFFFFFFFFFFFF -1 FFFFFFFFFFFFFFFF -1 +query: SELECT + conv(123455, 3, 10), + conv(131, 1, 5), + conv(515, 5, 100), + conv('10', -2, 2) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1197566112/10000 +5 NULL NULL NULL Index: ql/src/test/results/clientpositive/udf_bin.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_bin.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_bin.q.out (revision 0) @@ -0,0 +1,13 @@ +query: SELECT + bin(1), + bin(0), + bin(99992421) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1248344323/10000 +1 0 101111101011100001101100101 +query: -- Negative numbers should be treated as two's complement (64 bit). +SELECT bin(-5) FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1854602523/10000 +1111111111111111111111111111111111111111111111111111111111111011 Index: ql/src/test/results/clientpositive/udf_hex.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_hex.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_hex.q.out (revision 0) @@ -0,0 +1,24 @@ +query: -- If the argument is a string, hex should return a string containing two hex +-- digits for every character in the input. +SELECT + hex('Facebook'), + hex('\0'), + hex('qwertyuiopasdfghjkl') +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/375294624/10000 +46616365626F6F6B 00 71776572747975696F706173646667686A6B6C +query: -- If the argument is a number, hex should convert it to hexadecimal. +SELECT + hex(1), + hex(0), + hex(4207849477) +FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1068245471/10000 +1 0 FACEB005 +query: -- Negative numbers should be treated as two's complement (64 bit). +SELECT hex(-5) FROM src LIMIT 1 +Input: default/src +Output: file:/data/users/emil/hive1/hive1/build/ql/tmp/1738127711/10000 +FFFFFFFFFFFFFFFB Index: ql/src/test/queries/clientpositive/udf_bin.q =================================================================== --- ql/src/test/queries/clientpositive/udf_bin.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_bin.q (revision 0) @@ -0,0 +1,8 @@ +SELECT + bin(1), + bin(0), + bin(99992421) +FROM src LIMIT 1; + +-- Negative numbers should be treated as two's complement (64 bit). +SELECT bin(-5) FROM src LIMIT 1; Index: ql/src/test/queries/clientpositive/udf_hex.q =================================================================== --- ql/src/test/queries/clientpositive/udf_hex.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_hex.q (revision 0) @@ -0,0 +1,17 @@ +-- If the argument is a string, hex should return a string containing two hex +-- digits for every character in the input. +SELECT + hex('Facebook'), + hex('\0'), + hex('qwertyuiopasdfghjkl') +FROM src LIMIT 1; + +-- If the argument is a number, hex should convert it to hexadecimal. +SELECT + hex(1), + hex(0), + hex(4207849477) +FROM src LIMIT 1; + +-- Negative numbers should be treated as two's complement (64 bit). +SELECT hex(-5) FROM src LIMIT 1; Index: ql/src/test/queries/clientpositive/udf_conv.q =================================================================== --- ql/src/test/queries/clientpositive/udf_conv.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_conv.q (revision 0) @@ -0,0 +1,66 @@ +-- conv must work on both strings and integers up to 64-bit precision + +-- Some simple conversions to test different bases +SELECT + conv('4521', 10, 36), + conv('22', 10, 10), + conv('110011', 2, 16), + conv('facebook', 36, 16) +FROM src LIMIT 1; + +-- Test negative numbers. If to_base is positive, the number should be handled +-- as a two's complement (64-bit) +SELECT + conv('-641', 10, -10), + conv('1011', 2, -16), + conv('-1', 10, 16), + conv('-15', 10, 16) +FROM src LIMIT 1; + +-- Test overflow. If a number is two large, the result should be -1 (if signed) +-- or MAX_LONG (if unsigned) +SELECT + conv('9223372036854775807', 36, 16), + conv('9223372036854775807', 36, -16), + conv('-9223372036854775807', 36, 16), + conv('-9223372036854775807', 36, -16) +FROM src LIMIT 1; + +-- Test with invalid input. If one of the bases is invalid, the result should +-- be NULL. If there is an invalid digit in the number, the longest valid +-- prefix should be converted. +SELECT + conv('123455', 3, 10), + conv('131', 1, 5), + conv('515', 5, 100), + conv('10', -2, 2) +FROM src LIMIT 1; + +-- Perform the same tests with number arguments. + +SELECT + conv(4521, 10, 36), + conv(22, 10, 10), + conv(110011, 2, 16) +FROM src LIMIT 1; + +SELECT + conv(-641, 10, -10), + conv(1011, 2, -16), + conv(-1, 10, 16), + conv(-15, 10, 16) +FROM src LIMIT 1; + +SELECT + conv(9223372036854775807, 36, 16), + conv(9223372036854775807, 36, -16), + conv(-9223372036854775807, 36, 16), + conv(-9223372036854775807, 36, -16) +FROM src LIMIT 1; + +SELECT + conv(123455, 3, 10), + conv(131, 1, 5), + conv(515, 5, 100), + conv('10', -2, 2) +FROM src LIMIT 1; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 794756) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -71,6 +71,10 @@ registerUDF("power", UDFPower.class, OperatorType.PREFIX, false); registerUDF("pow", UDFPower.class, OperatorType.PREFIX, false); + registerUDF("conv", UDFConv.class, OperatorType.PREFIX, false); + registerUDF("bin", UDFBin.class, OperatorType.PREFIX, false); + registerUDF("hex", UDFHex.class, OperatorType.PREFIX, false); + registerUDF("upper", UDFUpper.class, OperatorType.PREFIX, false); registerUDF("lower", UDFLower.class, OperatorType.PREFIX, false); registerUDF("ucase", UDFUpper.class, OperatorType.PREFIX, false); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConv.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConv.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConv.java (revision 0) @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +public class UDFConv extends UDF { + private Text result = new Text(); + private byte[] value = new byte[64]; + + /** + * Divide x by m as if x is an unsigned 64-bit integer. + * Examples: + * unsignedLongDiv(-1, 2) == Long.MAX_VALUE + * unsignedLongDiv(6, 3) == 2 + * unsignedLongDiv(0, 5) == 0 + * + * @param x is treated as unsigned + * @param m is treated as signed + */ + private long unsignedLongDiv(long x, int m) { + if(x >= 0) { + return x / m; + } + + // Let uval be the value of the unsigned long with the same bits as x + // Two's complement => x = uval - 2*MAX - 2 + // => uval = x + 2*MAX + 2 + // Now, use the fact: (a+b)/c = a/c + b/c + (a%c+b%c)/c + return x/m + 2*(Long.MAX_VALUE/m) + 2/m + + (x%m + 2*(Long.MAX_VALUE%m) + 2%m) / m; + } + + /** + * Decode val into value[] + * + * @param val is treated as an unsigned 64-bit integer + * @param radix must be between MIN_RADIX and MAX_RADIX + */ + private void decode(long val, int radix) { + Arrays.fill(value, (byte)0); + for (int i = value.length-1; val != 0; i--) { + long q = unsignedLongDiv(val, radix); + value[i] = (byte)(val - q*radix); + val = q; + } + } + + /** + * Convert value[] into a long. On overflow, return -1 (as mySQL does). If a + * negative digit is found, ignore the suffix starting there. + * + * @param radix must be between MIN_RADIX and MAX_RADIX + * @return the result should be treated as an unsigned 64-bit integer. + */ + private long encode(int radix) { + long val = 0; + long bound = unsignedLongDiv(-1-radix, radix); // Possible overflow once val + // exceeds this value + for(int i = 0; i=0; i++) { + if(val >= bound) { + // Check for overflow + if(unsignedLongDiv(-1-value[i], radix) < val) { + return -1; + } + } + val = val*radix + value[i]; + } + return val; + } + + /** + * Convert the bytes in value[] to the corresponding chars. + * + * @param radix must be between MIN_RADIX and MAX_RADIX + * @param fromPos is the first nonzero element + */ + private void byte2char(int radix, int fromPos) + { + for(int i=fromPos; i < value.length; i++) { + value[i] = (byte)Character.toUpperCase( + Character.forDigit(value[i], radix)); + } + } + + /** + * Convert the chars in value[] to the corresponding integers. Convert invalid + * characters to -1. + * + * @param radix must be between MIN_RADIX and MAX_RADIX + * @param fromPos is the first nonzero element + */ + private void char2byte(int radix, int fromPos) + { + for(int i=fromPos; i0 the result is + * unsigned, otherwise it is signed. + * + */ + public Text evaluate(Text n, IntWritable fromBase, IntWritable toBase) + { + if (n == null || fromBase == null || toBase == null) { + return null; + } + + int fromBs = fromBase.get(); + int toBs = toBase.get(); + if(fromBs < Character.MIN_RADIX || fromBs > Character.MAX_RADIX + || Math.abs(toBs) < Character.MIN_RADIX + || Math.abs(toBs) > Character.MAX_RADIX ) { + return null; + } + + byte[] num = n.getBytes(); + boolean negative = (num[0]=='-'); + int first = 0; + if(negative) { + first = 1; + } + + // Copy the digits in the right side of the array + for(int i = 1; i <= n.getLength()-first; i++) { + value[value.length - i] = num[n.getLength() - i]; + } + char2byte(fromBs, value.length - n.getLength() + first); + + // Do the conversion by going through a 64 bit integer + long val = encode(fromBs); + if(negative && toBs > 0) { + if(val < 0) { + val = -1; + } else { + val = -val; + } + } + if(toBs < 0 && val <0 ) { + val = -val; + negative = true; + } + decode(val, Math.abs(toBs)); + + // Find the first non-zero digit or the last digits if all are zero. + for(first=0; first>>= 1; + } while(num != 0); + + result.set(value, value.length-len, len); + return result; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java (revision 0) @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +public class UDFHex extends UDF { + private Text result = new Text(); + byte[] value = new byte[16]; + + /** + * Convert num to hex. + * + */ + private Text evaluate(long num) { + // Extract the hex digits of num into value[] from right to left + int len = 0; + do { + len++; + value[value.length-len] = (byte)Character.toUpperCase( + Character.forDigit((int)(num & 0xF), 16)); + num >>>= 4; + } while(num != 0); + + result.set(value, value.length-len, len); + return result; + } + + public Text evaluate(LongWritable n) { + if (n == null) { + return null; + } + return evaluate((long)n.get()); + } + + public Text evaluate(IntWritable n) { + if (n == null) { + return null; + } + return evaluate(n.get()); + } + + /** + * Convert every character in s to two hex digits. + * + */ + public Text evaluate(Text s) { + if (s == null) { + return null; + } + + if(value.length < s.getLength()*2) { + value = new byte[s.getLength()*2]; + } + + byte[] str = s.getBytes(); + for(int i = 0; i < s.getLength(); i++) { + value[i*2] = (byte)Character.toUpperCase( + Character.forDigit((str[i]&0xF0)>>>4, 16)); + value[i*2 + 1] = (byte)Character.toUpperCase( + Character.forDigit(str[i]&0x0F, 16)); + } + + result.set(value, 0, s.getLength()*2); + return result; + } +}