Index: ql/src/test/results/clientpositive/udf_like.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_like.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_like.q.out (revision 0) @@ -0,0 +1,66 @@ +query: EXPLAIN +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a' +FROM src WHERE src.key = 86 +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (LIKE '_%_' '%\_\%\_%')) (TOK_SELEXPR (LIKE '__' '%\_\%\_%')) (TOK_SELEXPR (LIKE '%%_%_' '%\_\%\_%')) (TOK_SELEXPR (LIKE '%_%_%' '%\%\_\%')) (TOK_SELEXPR (LIKE '_%_' '\%\_%')) (TOK_SELEXPR (LIKE '%__' '__\%%')) (TOK_SELEXPR (LIKE '_%' '\_\%\_\%%')) (TOK_SELEXPR (LIKE '_%' '\_\%_%')) (TOK_SELEXPR (LIKE '%_' '\%\_')) (TOK_SELEXPR (LIKE 'ab' '\%\_')) (TOK_SELEXPR (LIKE 'ab' '_a%')) (TOK_SELEXPR (LIKE 'ab' 'a'))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) key) 86)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Filter Operator + predicate: + expr: (UDFToDouble(key) = UDFToDouble(86)) + type: boolean + Select Operator + expressions: + expr: ('_%_' like '%\_\%\_%') + type: boolean + expr: ('__' like '%\_\%\_%') + type: boolean + expr: ('%%_%_' like '%\_\%\_%') + type: boolean + expr: ('%_%_%' like '%\%\_\%') + type: boolean + expr: ('_%_' like '\%\_%') + type: boolean + expr: ('%__' like '__\%%') + type: boolean + expr: ('_%' like '\_\%\_\%%') + type: boolean + expr: ('_%' like '\_\%_%') + type: boolean + expr: ('%_' like '\%\_') + type: boolean + expr: ('ab' like '\%\_') + type: boolean + expr: ('ab' like '_a%') + type: boolean + expr: ('ab' like 'a') + type: boolean + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +query: SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a' +FROM src WHERE src.key = 86 +Input: default/src +Output: file:/home/yjia/hive/build/ql/tmp/839410699/10000 +true false true true false false false false true false false false Index: ql/src/test/queries/clientpositive/udf_like.q =================================================================== --- ql/src/test/queries/clientpositive/udf_like.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_like.q (revision 0) @@ -0,0 +1,10 @@ +EXPLAIN +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a' +FROM src WHERE src.key = 86; + +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a' +FROM src WHERE src.key = 86; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java (revision 785848) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java (working copy) @@ -32,7 +32,19 @@ private static Log LOG = LogFactory.getLog(UDFLike.class.getName()); private Text lastLikePattern = new Text(); private Pattern p = null; - + + // Doing characters comparison directly instead of regular expression + // matching for simple patterns like "%abc%". + private enum PatternType { + NONE, // "abc" + BEGIN, // "abc%" + END, // "%abc" + MIDDLE, // "%abc%" + COMPLEX, // all other cases, such as "ab%c_de" + } + private PatternType type = PatternType.COMPLEX; + private Text simplePattern = new Text(); + private BooleanWritable result = new BooleanWritable(); public UDFLike() { } @@ -63,16 +75,114 @@ return sb.toString(); } + private boolean checkSimplePattern(String likePattern) { + int length = likePattern.length(); + int beginIndex = 0; + int endIndex = length; + char lastChar = 'a'; + String strPattern = new String(); + type = PatternType.NONE; + + for (int i=0; i