Index: ql/src/test/results/clientpositive/udf1.q.out =================================================================== --- ql/src/test/results/clientpositive/udf1.q.out (revision 1851) +++ ql/src/test/results/clientpositive/udf1.q.out (working copy) @@ -110,10 +110,10 @@ Move Operator files: hdfs directory: true - destination: file:/data/users/njain/hive5/hive5/build/ql/tmp/509385687/10000 + destination: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/594380230/10000 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive5/hive5/build/ql/tmp/1277759891/10002 + file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/501326158/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -206,9 +206,9 @@ PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 -PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000 +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000 POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 -POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000 -true false true true true false false false true true false true false acc abc abb hive hadoop AaAbAcA false +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000 +true false true true true false false false true true false true true acc abc abb hive hadoop AaAbAcA false Index: ql/src/test/results/clientpositive/udf_regexp.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_regexp.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_regexp.q.out (revision 0) @@ -0,0 +1,85 @@ +PREHOOK: query: DESCRIBE FUNCTION regexp +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION regexp +POSTHOOK: type: DESCFUNCTION +str regexp regexp - Returns true if str matches regexp and false otherwise +PREHOOK: query: DESCRIBE FUNCTION EXTENDED regexp +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED regexp +POSTHOOK: type: DESCFUNCTION +str regexp regexp - Returns true if str matches regexp and false otherwise +Example: + > SELECT 'fb' regexp '.*' FROM src LIMIT 1; + true +PREHOOK: query: SELECT 'fofo' REGEXP '^fo' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1723674809/10000 +POSTHOOK: query: SELECT 'fofo' REGEXP '^fo' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1723674809/10000 +true +PREHOOK: query: SELECT 'fo\no' REGEXP '^fo\no$' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/970395682/10000 +POSTHOOK: query: SELECT 'fo\no' REGEXP '^fo\no$' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/970395682/10000 +true +PREHOOK: query: SELECT 'Bn' REGEXP '^Ba*n' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/697316244/10000 +POSTHOOK: query: SELECT 'Bn' REGEXP '^Ba*n' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/697316244/10000 +true +PREHOOK: query: SELECT 'afofo' REGEXP 'fo' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1270510740/10000 +POSTHOOK: query: SELECT 'afofo' REGEXP 'fo' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1270510740/10000 +true +PREHOOK: query: SELECT 'afofo' REGEXP '^fo' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1135254515/10000 +POSTHOOK: query: SELECT 'afofo' REGEXP '^fo' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1135254515/10000 +false +PREHOOK: query: SELECT 'Baan' REGEXP '^Ba?n' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/979129688/10000 +POSTHOOK: query: SELECT 'Baan' REGEXP '^Ba?n' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/979129688/10000 +false +PREHOOK: query: SELECT 'axe' REGEXP 'pi|apa' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/502935779/10000 +POSTHOOK: query: SELECT 'axe' REGEXP 'pi|apa' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/502935779/10000 +false +PREHOOK: query: SELECT 'pip' REGEXP '^(pi)*$' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1935414894/10000 +POSTHOOK: query: SELECT 'pip' REGEXP '^(pi)*$' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1935414894/10000 +false Index: ql/src/test/queries/clientpositive/udf_regexp.q =================================================================== --- ql/src/test/queries/clientpositive/udf_regexp.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_regexp.q (revision 0) @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION regexp; + +DESCRIBE FUNCTION EXTENDED regexp; + +SELECT 'fofo' REGEXP '^fo' FROM src LIMIT 1; +SELECT 'fo\no' REGEXP '^fo\no$' FROM src LIMIT 1; +SELECT 'Bn' REGEXP '^Ba*n' FROM src LIMIT 1; +SELECT 'afofo' REGEXP 'fo' FROM src LIMIT 1; + +SELECT 'afofo' REGEXP '^fo' FROM src LIMIT 1; +SELECT 'Baan' REGEXP '^Ba?n' FROM src LIMIT 1; +SELECT 'axe' REGEXP 'pi|apa' FROM src LIMIT 1; +SELECT 'pip' REGEXP '^(pi)*$' FROM src LIMIT 1; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (revision 1851) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (working copy) @@ -18,8 +18,11 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; @@ -36,6 +39,8 @@ ) public class UDFRegExp extends UDF { + static final Log LOG = LogFactory.getLog(UDFRegExp.class.getName()); + private Text lastRegex = new Text(); private Pattern p = null; @@ -47,12 +52,17 @@ if (s == null || regex == null) { return null; } + if(regex.getLength()==0) { + LOG.warn(getClass().getSimpleName() + " regex is empty"); + result.set(false); + return result; + } if (!regex.equals(lastRegex) || p == null) { lastRegex.set(regex); p = Pattern.compile(regex.toString()); } Matcher m = p.matcher(s.toString()); - result.set(m.matches()); + result.set(m.find(0)); return result; }