Index: ql/src/test/results/clientpositive/udf_find_in_set.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_find_in_set.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_find_in_set.q.out (revision 0) @@ -0,0 +1,213 @@ +PREHOOK: query: DESCRIBE FUNCTION find_in_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION find_in_set +POSTHOOK: type: DESCFUNCTION +find_in_set(str,str_array) - Returns the first occurrence of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED find_in_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED find_in_set +POSTHOOK: type: DESCFUNCTION +find_in_set(str,str_array) - Returns the first occurrence of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas. +Example: + > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1; + 3 + > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0; + 311 val_311 + 128 +PREHOOK: query: EXPLAIN +FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION find_in_set (. (TOK_TABLE_OR_COL src1) key) (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL src1) key) ',' (. (TOK_TABLE_OR_COL src1) value))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src1 + TableScan + alias: src1 + Select Operator + expressions: + expr: find_in_set(key, concat(key, ',', value)) + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1586937286/10000 +POSTHOOK: query: FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1586937286/10000 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +PREHOOK: query: SELECT find_in_set('ab','ab,abc,abcde') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1499102713/10000 +POSTHOOK: query: SELECT find_in_set('ab','ab,abc,abcde') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1499102713/10000 +1 +PREHOOK: query: SELECT find_in_set('ab','abc,ab,bbb') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/2025487244/10000 +POSTHOOK: query: SELECT find_in_set('ab','abc,ab,bbb') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/2025487244/10000 +2 +PREHOOK: query: SELECT find_in_set('ab','def,abc,ab') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1197303570/10000 +POSTHOOK: query: SELECT find_in_set('ab','def,abc,ab') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1197303570/10000 +3 +PREHOOK: query: SELECT find_in_set('ab','abc,abd,abf') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/180430857/10000 +POSTHOOK: query: SELECT find_in_set('ab','abc,abd,abf') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/180430857/10000 +0 +PREHOOK: query: SELECT find_in_set(null,'a,b,c') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/582632662/10000 +POSTHOOK: query: SELECT find_in_set(null,'a,b,c') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/582632662/10000 +NULL +PREHOOK: query: SELECT find_in_set('a',null) FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/122073254/10000 +POSTHOOK: query: SELECT find_in_set('a',null) FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/122073254/10000 +NULL +PREHOOK: query: SELECT find_in_set('', '') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1837381738/10000 +POSTHOOK: query: SELECT find_in_set('', '') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1837381738/10000 +1 +PREHOOK: query: SELECT find_in_set('',',') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/984994381/10000 +POSTHOOK: query: SELECT find_in_set('',',') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/984994381/10000 +1 +PREHOOK: query: SELECT find_in_set('','a,,b') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/853818173/10000 +POSTHOOK: query: SELECT find_in_set('','a,,b') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/853818173/10000 +2 +PREHOOK: query: SELECT find_in_set('','a,b,') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1835713486/10000 +POSTHOOK: query: SELECT find_in_set('','a,b,') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/1835713486/10000 +3 +PREHOOK: query: SELECT find_in_set(',','a,b,d,') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/268862937/10000 +POSTHOOK: query: SELECT find_in_set(',','a,b,d,') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/268862937/10000 +0 +PREHOOK: query: SELECT find_in_set('a','') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/2079852974/10000 +POSTHOOK: query: SELECT find_in_set('a','') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/2079852974/10000 +0 +PREHOOK: query: SELECT find_in_set('a,','a,b,c,d') FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/982350293/10000 +POSTHOOK: query: SELECT find_in_set('a,','a,b,c,d') FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/982350293/10000 +0 +PREHOOK: query: SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,2,956')=0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/40028878/10000 +POSTHOOK: query: SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,2,956')=0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: file:/mnt/vol/devrs002.snc1/hsurreal/hive-trunk/build/ql/tmp/40028878/10000 +311 val_311 +128 Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 822338) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -44,6 +44,7 @@ double elt exp +find_in_set float floor from_unixtime Index: ql/src/test/results/clientpositive/describe_function.q.out =================================================================== --- ql/src/test/results/clientpositive/describe_function.q.out (revision 822338) +++ ql/src/test/results/clientpositive/describe_function.q.out (working copy) @@ -13,6 +13,11 @@ POSTHOOK: query: DESCRIBE FUNCTION substring POSTHOOK: type: DESCFUNCTION substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len +PREHOOK: query: DESCRIBE FUNCTION find_in_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION find_in_set +POSTHOOK: type: DESCFUNCTION +find_in_set(str,str_array) - Returns the first occurrence of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas. PREHOOK: query: DESCRIBE FUNCTION space PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION space @@ -443,6 +448,17 @@ 'ebook' > SELECT substring('Facebook', 5, 1) FROM src LIMIT 1; 'b' +PREHOOK: query: DESCRIBE FUNCTION EXTENDED find_in_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED find_in_set +POSTHOOK: type: DESCFUNCTION +find_in_set(str,str_array) - Returns the first occurrence of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas. +Example: + > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1; + 3 + > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0; + 311 val_311 + 128 PREHOOK: query: DESCRIBE FUNCTION EXTENDED space PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED space Index: ql/src/test/queries/clientpositive/udf_find_in_set.q =================================================================== --- ql/src/test/queries/clientpositive/udf_find_in_set.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_find_in_set.q (revision 0) @@ -0,0 +1,23 @@ +DESCRIBE FUNCTION find_in_set; +DESCRIBE FUNCTION EXTENDED find_in_set; + +EXPLAIN +FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)); + +FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)); + +SELECT find_in_set('ab','ab,abc,abcde') FROM src1 LIMIT 1; +SELECT find_in_set('ab','abc,ab,bbb') FROM src1 LIMIT 1; +SELECT find_in_set('ab','def,abc,ab') FROM src1 LIMIT 1; +SELECT find_in_set('ab','abc,abd,abf') FROM src1 LIMIT 1; +SELECT find_in_set(null,'a,b,c') FROM src1 LIMIT 1; +SELECT find_in_set('a',null) FROM src1 LIMIT 1; +SELECT find_in_set('', '') FROM src1 LIMIT 1; +SELECT find_in_set('',',') FROM src1 LIMIT 1; +SELECT find_in_set('','a,,b') FROM src1 LIMIT 1; +SELECT find_in_set('','a,b,') FROM src1 LIMIT 1; +SELECT find_in_set(',','a,b,d,') FROM src1 LIMIT 1; +SELECT find_in_set('a','') FROM src1 LIMIT 1; +SELECT find_in_set('a,','a,b,c,d') FROM src1 LIMIT 1; + +SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,2,956')=0; Index: ql/src/test/queries/clientpositive/describe_function.q =================================================================== --- ql/src/test/queries/clientpositive/describe_function.q (revision 822338) +++ ql/src/test/queries/clientpositive/describe_function.q (working copy) @@ -1,6 +1,7 @@ DESCRIBE FUNCTION concat; DESCRIBE FUNCTION substr; DESCRIBE FUNCTION substring; +DESCRIBE FUNCTION find_in_set; DESCRIBE FUNCTION space; DESCRIBE FUNCTION repeat; DESCRIBE FUNCTION ascii; @@ -85,6 +86,7 @@ DESCRIBE FUNCTION EXTENDED concat; DESCRIBE FUNCTION EXTENDED substr; DESCRIBE FUNCTION EXTENDED substring; +DESCRIBE FUNCTION EXTENDED find_in_set; DESCRIBE FUNCTION EXTENDED space; DESCRIBE FUNCTION EXTENDED repeat; DESCRIBE FUNCTION EXTENDED ascii; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 822338) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -105,7 +105,8 @@ registerUDF("rtrim", UDFRTrim.class, false); registerUDF("length", UDFLength.class, false); registerUDF("reverse", UDFReverse.class, false); - + registerUDF("find_in_set", UDFFindInSet.class, false); + registerUDF("like", UDFLike.class, true); registerUDF("rlike", UDFRegExp.class, true); registerUDF("regexp", UDFRegExp.class, true); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java (revision 0) @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.description; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.IntWritable; + +@description( + name = "find_in_set", + value = "_FUNC_(str,str_array) - Returns the first occurrence " + + " of str in str_array where str_array is a comma-delimited string." + + " Returns null if either argument is null." + + " Returns 0 if the first argument has any commas.", + extended = "Example:\n" + + " > SELECT _FUNC_('ab','abc,b,ab,c,def') FROM src LIMIT 1;\n" + + " 3\n" + + " > SELECT * FROM src1 WHERE NOT _FUNC_(key,'311,128,345,956')=0;\n" + + " 311 val_311\n" + + " 128" + + ) + +public class UDFFindInSet extends UDF { + private IntWritable result = new IntWritable(); + + public IntWritable evaluate2(Text s1, Text s2) { + result.set(0); + return result; + } + + public IntWritable evaluate(Text s, Text txtarray) { + if (s == null || txtarray == null) { + return null; + } + + byte[] search_bytes = s.getBytes(); + + for(int i = 0; i < s.getLength(); i++) { + if(search_bytes[i]==',') { + result.set(0); + return result; + } + + } + + byte[] data = txtarray.getBytes(); + int search_length = s.getLength(); + + int cur_pos_in_array = 0; + int cur_length = 0; + boolean matching = true; + + for(int i = 0; i < txtarray.getLength(); i++) { + if(data[i] == ',') { + cur_pos_in_array++; + if (matching && cur_length == search_length) { + result.set(cur_pos_in_array); + return result; + } else { + matching = true; + cur_length = 0; + } + } else { + if (cur_length + 1 <= search_length){ + if(!matching || search_bytes[cur_length] != data[i]) { + matching = false; + } + } else { + matching = false; + } + cur_length++; + } + + } + + if(matching && cur_length == search_length) { + cur_pos_in_array++; + result.set(cur_pos_in_array); + return result; + } else { + result.set(0); + return result; + } + } + +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java ___________________________________________________________________ Added: svn:executable + *