diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index d1e1441..b516925 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hive.ql.udf.UDFAtan; import org.apache.hadoop.hive.ql.udf.UDFBase64; import org.apache.hadoop.hive.ql.udf.UDFBin; +import org.apache.hadoop.hive.ql.udf.UDFChr; import org.apache.hadoop.hive.ql.udf.UDFConv; import org.apache.hadoop.hive.ql.udf.UDFCos; import org.apache.hadoop.hive.ql.udf.UDFCrc32; @@ -83,6 +84,7 @@ import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract; import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace; import org.apache.hadoop.hive.ql.udf.UDFRepeat; +import org.apache.hadoop.hive.ql.udf.UDFReplace; import org.apache.hadoop.hive.ql.udf.UDFReverse; import org.apache.hadoop.hive.ql.udf.UDFSecond; import org.apache.hadoop.hive.ql.udf.UDFSha1; @@ -225,6 +227,7 @@ system.registerUDF("conv", UDFConv.class, false); system.registerUDF("bin", UDFBin.class, false); + system.registerUDF("chr", UDFChr.class, false); system.registerUDF("hex", UDFHex.class, false); system.registerUDF("unhex", UDFUnhex.class, false); system.registerUDF("base64", UDFBase64.class, false); @@ -256,6 +259,7 @@ system.registerGenericUDF("rlike", GenericUDFRegExp.class); system.registerGenericUDF("regexp", GenericUDFRegExp.class); system.registerUDF("regexp_replace", UDFRegExpReplace.class, false); + system.registerUDF("replace", UDFReplace.class, false); system.registerUDF("regexp_extract", UDFRegExpExtract.class, false); system.registerUDF("parse_url", UDFParseUrl.class, false); system.registerGenericUDF("nvl", GenericUDFNvl.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFChr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFChr.java new file mode 100644 index 0000000..4de3e3c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFChr.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +/** + * UDFChr converts an integer into its ASCII equivalent. + * + */ +@Description(name = "char", value = "_FUNC_(str) - convert n where n : [0, 256) into the ascii equivalent as a varchar." + + "If n is less than 0 return the empty string. If n > 256, return _FUNC_(n % 256).", + extended = "Example:\n" + + " > SELECT _FUNC_('48') FROM src LIMIT 1;\n" + " '0'\n" + + " > SELECT _FUNC_('65') FROM src LIMIT 1;\n" + " 'A'" +) +public class UDFChr extends UDF { + private final Text result = new Text(); + + final String nulString = String.valueOf('\u0000'); + + public Text evaluate(LongWritable n) { + if (n == null) { + return null; + } + return evaluateInternal(n.get()); + } + + public Text evaluate(DoubleWritable n) { + if (n == null) { + return null; + } + return evaluateInternal(n.get()); + } + + private Text evaluateInternal(long n) { + if (n == 0L) { + result.set(nulString); + return result; + } + if (n < 0L) { + result.set(""); + return result; + } + + // Should only down-cast if within valid range. + return evaluateInternal((short) n); + } + + private Text evaluateInternal(double n) { + if (n == 0.0d) { + result.set(nulString); + return result; + } + if (n < 0.0d) { + result.set(""); + return result; + } + + // Should only down-cast and elimination precision if within valid range. + return evaluateInternal((short) n); + } + + private Text evaluateInternal(short n) { + if (n > 255) { + n = (short) (n % 256); + } + if (n == 0) { + result.set(nulString); + return result; + } + if (n < 0) { + result.set(""); + return result; + } + + result.set(String.valueOf((char) n)); + + return result; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFReplace.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFReplace.java new file mode 100644 index 0000000..5cf1f48 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFReplace.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.Text; + + +/** + * UDFReplace replaces all substrings that are matched with a replacement substring. + * + */ +@Description(name = "replace", + value = "_FUNC_(str, search, rep) - replace all substrings of 'str' that " + + "match 'search' with 'rep'", extended = "Example:\n" + + " > SELECT _FUNC_('Hack and Hue', 'H', 'BL') FROM src LIMIT 1;\n" + + " 'BLack and BLue'") +public class UDFReplace extends UDF { + + private Text result = new Text(); + + public UDFReplace() { + } + + public Text evaluate(Text s, Text search, Text replacement) { + if (s == null || search == null || replacement == null) { + return null; + } + String r = s.toString().replace(search.toString(), replacement.toString()); + result.set(r); + return result; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFChr.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFChr.java new file mode 100644 index 0000000..bb4d012 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFChr.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFChr; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + + +public class TestGenericUDFChr { + + @Test + public void testChr() throws HiveException { + UDFChr udf = new UDFChr(); + + // Test string "0" + double d = 48.0d; + float f = 48.0f; + long l = 48L; + int i = 48; + short s = 48; + runAndVerify(d, udf, "0"); + runAndVerify(f, udf, "0"); + runAndVerify(l, udf, "0"); + runAndVerify(i, udf, "0"); + runAndVerify(s, udf, "0"); + + // Test string "A" + d = 65.123d; + f = 65.123f; + l = 65L; + i = 65; + s = 65; + runAndVerify(d, udf, "A"); + runAndVerify(f, udf, "A"); + runAndVerify(l, udf, "A"); + runAndVerify(i, udf, "A"); + runAndVerify(s, udf, "A"); + + // Test negative integers result in "" + d = -65.123d; + f = -65.123f; + l = -65L; + i = -65; + s = -65; + runAndVerify(d, udf, ""); + runAndVerify(f, udf, ""); + runAndVerify(l, udf, ""); + runAndVerify(i, udf, ""); + runAndVerify(s, udf, ""); + + // Test 0 is nul character + d = 0.9d; + f = 0.9f; + l = 0L; + i = 0; + s = 0; + char nul = '\u0000'; + String nulString = String.valueOf(nul); + runAndVerify(d, udf, nulString); + runAndVerify(f, udf, nulString); + runAndVerify(l, udf, nulString); + runAndVerify(i, udf, nulString); + runAndVerify(s, udf, nulString); + + // Test 256 or greater is n % 256 + d = 256.9d; + f = 256.9f; + l = 256L; + i = 256; + s = 256; + runAndVerify(d, udf, nulString); + runAndVerify(f, udf, nulString); + runAndVerify(l, udf, nulString); + runAndVerify(i, udf, nulString); + runAndVerify(s, udf, nulString); + + d = 321.9d; + f = 321.9f; + l = 321L; + i = 321; + s = 321; + runAndVerify(d, udf, "A"); + runAndVerify(f, udf, "A"); + runAndVerify(l, udf, "A"); + runAndVerify(i, udf, "A"); + runAndVerify(s, udf, "A"); + + // Test down-casting when greater than 256. + d = Double.MAX_VALUE; + f = Float.MAX_VALUE; + l = Long.MAX_VALUE; + i = Integer.MAX_VALUE; + s = Short.MAX_VALUE; // 32767 % 256 = 255 + runAndVerify(d, udf, ""); + runAndVerify(f, udf, ""); + runAndVerify(l, udf, ""); + runAndVerify(i, udf, ""); + runAndVerify(s, udf, "ÿ"); + + } + + private void runAndVerify(long v, UDFChr udf, String expV) throws HiveException { + Text output = (Text) udf.evaluate(new LongWritable(v)); + verifyOutput(output, expV); + } + + private void runAndVerify(int v, UDFChr udf, String expV) throws HiveException { + Text output = (Text) udf.evaluate(new LongWritable(v)); + verifyOutput(output, expV); + } + + private void runAndVerify(short v, UDFChr udf, String expV) throws HiveException { + Text output = (Text) udf.evaluate(new LongWritable(v)); + verifyOutput(output, expV); + } + + private void runAndVerify(double v, UDFChr udf, String expV) throws HiveException { + Text output = (Text) udf.evaluate(new DoubleWritable(v)); + verifyOutput(output, expV); + } + + private void runAndVerify(float v, UDFChr udf, String expV) throws HiveException { + Text output = (Text) udf.evaluate(new DoubleWritable(v)); + verifyOutput(output, expV); + } + + private void verifyOutput(Text output, String expV) { + if (expV == null) { + Assert.assertNull(output); + } else { + Assert.assertNotNull(output); + Assert.assertEquals("chr() test ", expV, output.toString()); + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFReplace.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFReplace.java new file mode 100644 index 0000000..59f2812 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFReplace.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import junit.framework.TestCase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFReplace; +import org.apache.hadoop.io.Text; + +public class TestGenericUDFReplace extends TestCase { + + public void testReplace() throws HiveException { + UDFReplace udf = new UDFReplace(); + + // One of the params is null, then expected is null. + verify(udf, null, new Text(), new Text(), null); + verify(udf, new Text(), null, new Text(), null); + verify(udf, new Text(), new Text(), null, null); + + // Empty string + verify(udf, new Text(), new Text(), new Text(), ""); + + // No match + verify(udf, new Text("ABCDEF"), new Text("X"), new Text("Z"), "ABCDEF"); + + // Case-sensitive string found + verify(udf, new Text("Hack and Hue"), new Text("H"), new Text("BL"), "BLack and BLue"); + verify(udf, new Text("ABABrdvABrk"), new Text("AB"), new Text("a"), "aardvark"); + } + + + private void verify(UDFReplace udf, Text str, Text search, Text replacement, String expResult) throws HiveException { + Text output = (Text) udf.evaluate(str, search, replacement); + if (expResult == null) { + assertNull(output); + } else { + assertNotNull(output); + assertEquals("replace() test ", expResult, output.toString()); + } + } +} diff --git a/ql/src/test/queries/clientpositive/udf_chr.q b/ql/src/test/queries/clientpositive/udf_chr.q new file mode 100644 index 0000000..6516a92 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_chr.q @@ -0,0 +1,25 @@ +DESCRIBE FUNCTION chr; +DESC FUNCTION EXTENDED chr; + +select chr(-1), +chr(0Y), +chr(1Y), +chr(48Y), +chr(65Y), + +chr(0S), +chr(1S), +chr(48S), +chr(65S), +chr(321S), + +chr(0L), +chr(1L), +chr(48L), +chr(65L), +chr(321L), + +chr(cast(68.12 as float)), +chr(cast(68.12 as double)), +chr(cast(321.12 as double)), +chr(32457964L); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/udf_replace.q b/ql/src/test/queries/clientpositive/udf_replace.q new file mode 100644 index 0000000..feab79c --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_replace.q @@ -0,0 +1,9 @@ +DESCRIBE FUNCTION replace; +DESC FUNCTION EXTENDED replace; + +select replace('', '', ''), +replace(null, '', ''), +replace('', null, ''), +replace('', '', null), +replace('Hack and Hue', 'H', 'BL'), +replace('ABABrdvABrk', 'AB', 'a'); \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index 789bedf..3cddcce 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -41,6 +41,7 @@ case cbrt ceil ceiling +chr coalesce collect_list collect_set @@ -167,6 +168,7 @@ regexp regexp_extract regexp_replace repeat +replace reverse rlike round @@ -238,6 +240,7 @@ case cbrt ceil ceiling +chr coalesce collect_list collect_set @@ -288,6 +291,7 @@ percentile posexplode positive regexp_replace +replace reverse rlike size diff --git a/ql/src/test/results/clientpositive/udf_chr.q.out b/ql/src/test/results/clientpositive/udf_chr.q.out new file mode 100644 index 0000000..a662e70 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_chr.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: DESCRIBE FUNCTION chr +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION chr +POSTHOOK: type: DESCFUNCTION +chr(str) - convert n where n : [0, 256) into the ascii equivalent as a varchar.If n is less than 0 return the empty string. If n > 256, return chr(n % 256). +PREHOOK: query: DESC FUNCTION EXTENDED chr +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED chr +POSTHOOK: type: DESCFUNCTION +chr(str) - convert n where n : [0, 256) into the ascii equivalent as a varchar.If n is less than 0 return the empty string. If n > 256, return chr(n % 256). +Example: + > SELECT chr('48') FROM src LIMIT 1; + '0' + > SELECT chr('65') FROM src LIMIT 1; + 'A' +PREHOOK: query: select chr(-1), +chr(0Y), +chr(1Y), +chr(48Y), +chr(65Y), + +chr(0S), +chr(1S), +chr(48S), +chr(65S), +chr(321S), + +chr(0L), +chr(1L), +chr(48L), +chr(65L), +chr(321L), + +chr(cast(68.12 as float)), +chr(cast(68.12 as double)), +chr(cast(321.12 as double)), +chr(32457964L) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select chr(-1), +chr(0Y), +chr(1Y), +chr(48Y), +chr(65Y), + +chr(0S), +chr(1S), +chr(48S), +chr(65S), +chr(321S), + +chr(0L), +chr(1L), +chr(48L), +chr(65L), +chr(321L), + +chr(cast(68.12 as float)), +chr(cast(68.12 as double)), +chr(cast(321.12 as double)), +chr(32457964L) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +  0 A  0 A A  0 A A D D A ì diff --git a/ql/src/test/results/clientpositive/udf_replace.q.out b/ql/src/test/results/clientpositive/udf_replace.q.out new file mode 100644 index 0000000..10bce89 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_replace.q.out @@ -0,0 +1,32 @@ +PREHOOK: query: DESCRIBE FUNCTION replace +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION replace +POSTHOOK: type: DESCFUNCTION +replace(str, search, rep) - replace all substrings of 'str' that match 'search' with 'rep' +PREHOOK: query: DESC FUNCTION EXTENDED replace +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED replace +POSTHOOK: type: DESCFUNCTION +replace(str, search, rep) - replace all substrings of 'str' that match 'search' with 'rep' +Example: + > SELECT replace('Hack and Hue', 'H', 'BL') FROM src LIMIT 1; + 'BLack and BLue' +PREHOOK: query: select replace('', '', ''), +replace(null, '', ''), +replace('', null, ''), +replace('', '', null), +replace('Hack and Hue', 'H', 'BL'), +replace('ABABrdvABrk', 'AB', 'a') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select replace('', '', ''), +replace(null, '', ''), +replace('', null, ''), +replace('', '', null), +replace('Hack and Hue', 'H', 'BL'), +replace('ABABrdvABrk', 'AB', 'a') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### + NULL NULL NULL BLack and BLue aardvark