diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 92fee9b..ac10b03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -70,6 +70,7 @@ import org.apache.hadoop.hive.ql.udf.UDFLog; import org.apache.hadoop.hive.ql.udf.UDFLog10; import org.apache.hadoop.hive.ql.udf.UDFLog2; +import org.apache.hadoop.hive.ql.udf.UDFMd5; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; import org.apache.hadoop.hive.ql.udf.UDFOPBitAnd; @@ -87,6 +88,7 @@ import org.apache.hadoop.hive.ql.udf.UDFRepeat; import org.apache.hadoop.hive.ql.udf.UDFReverse; import org.apache.hadoop.hive.ql.udf.UDFSecond; +import org.apache.hadoop.hive.ql.udf.UDFSha256; import org.apache.hadoop.hive.ql.udf.UDFSign; import org.apache.hadoop.hive.ql.udf.UDFSin; import org.apache.hadoop.hive.ql.udf.UDFSpace; @@ -214,6 +216,8 @@ registerUDF("unhex", UDFUnhex.class, false); registerUDF("base64", UDFBase64.class, false); registerUDF("unbase64", UDFUnbase64.class, false); + registerUDF("md5", UDFMd5.class, false); + registerUDF("sha256", UDFSha256.class, false); registerGenericUDF("encode", GenericUDFEncode.class); registerGenericUDF("decode", GenericUDFDecode.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/HashUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/HashUDF.java new file mode 100644 index 0000000..e051570 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/HashUDF.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * Parent class for hashing UDF + * */ +public abstract class HashUDF extends UDF { + + private MessageDigest md = null; + + public HashUDF(String algorithm) { + try { + md = MessageDigest.getInstance(algorithm); + } catch(NoSuchAlgorithmException e) { + System.out.println("Cannot find digest algorithm " + algorithm); + System.exit(1); + } + } + + protected String calculate(String str) { + md.reset(); + md.update(str.getBytes()); + byte[] md5hash = md.digest(); + StringBuilder builder = new StringBuilder(); + for (byte b : md5hash) { + builder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1)); + } + return builder.toString(); + + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java new file mode 100644 index 0000000..b2f53d3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.io.Text; + +@Description(name = "md5", + value = "_FUNC_(str, [salt]) - return MD5(str) hash code ", + extended = "Example:\n" + + " > SELECT _FUNC_('secret') FROM src LIMIT 1;\n" + +" '5ebe2294ecd0e0f08eab7690d2a6ee69'") +public class UDFMd5 extends HashUDF { + + public UDFMd5() { + super("MD5"); + } + + public Text evaluate(Text str) { + if(str == null) { + return null; + } + + return md5(str.toString()); + } + + public Text evaluate(Text str, Text salt) { + if(str == null || salt == null) { + return null; + } + + return md5(str.toString() + salt.toString()); + } + + private Text md5(String str) { + return new Text( calculate(str) ); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha256.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha256.java new file mode 100644 index 0000000..3edc7c6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha256.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.io.Text; + +@Description(name = "sha256", + value = "_FUNC_(str, [salt]) - return SHA-256(str) hash code ", + extended = "Example:\n" + + " > SELECT _FUNC_('123456') FROM src LIMIT 1;\n" + +" '8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92'") +public class UDFSha256 extends HashUDF { + + public UDFSha256() { + super("SHA-256"); + } + + public Text evaluate(Text str) { + if(str == null) { + return null; + } + + return sha256(str.toString()); + } + + public Text evaluate(Text str, Text salt) { + if(str == null || salt == null) { + return null; + } + + return sha256(str.toString() + salt.toString()); + } + + private Text sha256(String str) { + return new Text( calculate(str) ); + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java new file mode 100644 index 0000000..128f107 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import junit.framework.Assert; +import org.apache.hadoop.io.Text; +import org.junit.Test; + + +public class TestUDFMd5 { + + private UDFMd5 udf = new UDFMd5(); + + @Test + public void testEvaluate() throws Exception { + Assert.assertEquals("5ebe2294ecd0e0f08eab7690d2a6ee69", udf.evaluate(new Text("secret")).toString()); + } + + @Test + public void testEvaluate1() throws Exception { + Assert.assertEquals("5ebe2294ecd0e0f08eab7690d2a6ee69", udf.evaluate( new Text("sec"), new Text("ret") ).toString() ); + } + + @Test + public void testEvaluateNull() { + Assert.assertNull(udf.evaluate(null)); + } + + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha256.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha256.java new file mode 100644 index 0000000..a0bc31b --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha256.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import junit.framework.Assert; +import org.apache.hadoop.io.Text; +import org.junit.Test; + + +public class TestUDFSha256 { + + private UDFSha256 udf = new UDFSha256(); + + @Test + public void testEvaluate() throws Exception { + Assert.assertEquals("8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92", + udf.evaluate(new Text("123456")).toString()); + } + + @Test + public void testEvaluate1() throws Exception { + Assert.assertEquals("8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92", + udf.evaluate( new Text("1234"), new Text("56") ).toString() ); + } + + @Test + public void testEvaluateNull() { + Assert.assertNull(udf.evaluate(null)); + } +}