diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index f2dc874..a19ea15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -79,6 +79,7 @@ import org.apache.hadoop.hive.ql.udf.UDFLpad; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; +import org.apache.hadoop.hive.ql.udf.UDFMakeSet; import org.apache.hadoop.hive.ql.udf.UDFOPBitAnd; import org.apache.hadoop.hive.ql.udf.UDFOPBitNot; import org.apache.hadoop.hive.ql.udf.UDFOPBitOr; @@ -295,6 +296,7 @@ registerUDF("upper", UDFUpper.class, false); registerUDF("lower", UDFLower.class, false); + registerUDF("make_set", UDFMakeSet.class, false); registerUDF("ucase", UDFUpper.class, false); registerUDF("lcase", UDFLower.class, false); registerUDF("trim", UDFTrim.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMakeSet.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMakeSet.java new file mode 100644 index 0000000..9f9819e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMakeSet.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import java.util.StringTokenizer; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.Text; + +/** + * UDFMake_Set. + * + */ +@Description(name = "make_set", value = "_FUNC_(str) - function converts the decimal number in bits to binary and returns a comma-separated list of values for all the bits that are set in that number, using string1 for the low-order bit, string2 for the next lowest bit, etc.\n" + + + "Example:\n " + + " > SELECT make_set(val) FROM somedata;\n") +public class UDFMakeSet extends UDF { + + + + public UDFMakeSet() { + } + + /** + * Get the set from a makeSetString. + * + * @param makeSetString + * the makeSetString in the format of "bits, str1, str2,…." . + * + * @return a comma-separated list of values for all the bits that are set in that number, using string1 for + * the low-order bit, string2 for the next lowest bit, etc. + */ + public Text evaluate(Text makeSetString) { + + String[] stringArray = new String[100]; + StringTokenizer st = new StringTokenizer(makeSetString.toString(), ","); + int i = 0; + while (st.hasMoreTokens()) { + stringArray[i] = st.nextToken(); + i++; + } + + String bit = stringArray[0]; + String[] stringArrayBit = new String[100]; + StringTokenizer stbit = new StringTokenizer(bit, "|"); + int count = 0; + i = 0; + while (stbit.hasMoreTokens()) { + stringArrayBit[i] = stbit.nextToken(); + i++; + count++; + } + + int stringArrayBits; + + Text makeSetText = new Text(""); + String[] makeSetArray = new String[50]; + int k=0; + String makeSetstr = ""; + + + for (int j = 0; j < count; j++) { + + stringArrayBits = Integer.parseInt(stringArrayBit[j]); + String bits = Integer.toBinaryString(stringArrayBits); + bits = new StringBuffer(bits).reverse().toString(); + int length = bits.length(); + int flag =0; + for (i = 0; i < length; i++) { + if (bits.charAt(i) == '1') { + if(j==0){ + makeSetArray[k] = stringArray[i + 1]; + k++; + } + else{ + for(int p = 0;p SELECT make_set(val) FROM somedata; + +PREHOOK: query: DESCRIBE FUNCTION EXTENDED make_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED make_set +POSTHOOK: type: DESCFUNCTION +make_set(str) - function converts the decimal number in bits to binary and returns a comma-separated list of values for all the bits that are set in that number, using string1 for the low-order bit, string2 for the next lowest bit, etc. +Example: + > SELECT make_set(val) FROM somedata; + +PREHOOK: query: SELECT make_set(column1) FROM testmakeset +PREHOOK: type: QUERY +PREHOOK: Input: default@testmakeset +#### A masked pattern was here #### +POSTHOOK: query: SELECT make_set(column1) FROM testmakeset +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testmakeset +#### A masked pattern was here #### +b +d,e +i +j,l +m