Index: ql/src/test/results/clientpositive/udf1.q.out =================================================================== --- ql/src/test/results/clientpositive/udf1.q.out (revision 1851) +++ ql/src/test/results/clientpositive/udf1.q.out (working copy) @@ -110,10 +110,10 @@ Move Operator files: hdfs directory: true - destination: file:/data/users/njain/hive5/hive5/build/ql/tmp/509385687/10000 + destination: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/594380230/10000 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive5/hive5/build/ql/tmp/1277759891/10002 + file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/501326158/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -206,9 +206,9 @@ PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 -PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000 +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000 POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 -POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1812153467/10000 -true false true true true false false false true true false true false acc abc abb hive hadoop AaAbAcA false +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1599463850/10000 +true false true true true false false false true true false true true acc abc abb hive hadoop AaAbAcA false Index: ql/src/test/results/clientpositive/udf_regexp.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_regexp.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_regexp.q.out (revision 0) @@ -0,0 +1,26 @@ +PREHOOK: query: DESCRIBE FUNCTION regexp +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION regexp +POSTHOOK: type: DESCFUNCTION +str regexp regexp - Returns true if str matches regexp and false otherwise +PREHOOK: query: DESCRIBE FUNCTION EXTENDED regexp +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED regexp +POSTHOOK: type: DESCFUNCTION +str regexp regexp - Returns true if str matches regexp and false otherwise +Example: + > SELECT 'fb' regexp '.*' FROM src LIMIT 1; + true +PREHOOK: query: SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo', +'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$' +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/852993560/10000 +POSTHOOK: query: SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo', +'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$' +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/852993560/10000 +true true true true false false false false Index: ql/src/test/queries/clientpositive/udf_regexp.q =================================================================== --- ql/src/test/queries/clientpositive/udf_regexp.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_regexp.q (revision 0) @@ -0,0 +1,7 @@ +DESCRIBE FUNCTION regexp; + +DESCRIBE FUNCTION EXTENDED regexp; + +SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo', +'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$' +FROM src LIMIT 1; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (revision 1851) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExp.java (working copy) @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.description; import org.apache.hadoop.io.BooleanWritable; @@ -36,8 +38,11 @@ ) public class UDFRegExp extends UDF { + static final Log LOG = LogFactory.getLog(UDFRegExp.class.getName()); + private Text lastRegex = new Text(); private Pattern p = null; + boolean warned = false; BooleanWritable result = new BooleanWritable(); public UDFRegExp() { @@ -47,12 +52,21 @@ if (s == null || regex == null) { return null; } + if(regex.getLength()==0) { + if(!warned) { + warned = true; + LOG.warn(getClass().getSimpleName() + " regex is empty. Additional " + + "warnings for an empty regex will be suppressed."); + } + result.set(false); + return result; + } if (!regex.equals(lastRegex) || p == null) { lastRegex.set(regex); p = Pattern.compile(regex.toString()); } Matcher m = p.matcher(s.toString()); - result.set(m.matches()); + result.set(m.find(0)); return result; }