diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java index 7f4a807..33d0822 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java @@ -447,8 +447,7 @@ public static String getOrdinal(int i) { /** * Finds any occurence of subtext from text in the - * backing buffer, for avoiding string encoding and decoding. Shamelessly copy - * from {@link org.apache.hadoop.io.Text#find(String, int)}. + * backing buffer. */ public static int findText(Text text, Text subtext, int start) { // src.position(start) can't accept negative numbers. @@ -463,38 +462,9 @@ public static int findText(Text text, Text subtext, int start) { return -1; } - ByteBuffer src = ByteBuffer.wrap(text.getBytes(), 0, text.getLength()); - ByteBuffer tgt = ByteBuffer - .wrap(subtext.getBytes(), 0, subtext.getLength()); - byte b = tgt.get(); - src.position(start); - - while (src.hasRemaining()) { - if (b == src.get()) { // matching first byte - src.mark(); // save position in loop - tgt.mark(); // save position in target - boolean found = true; - int pos = src.position() - 1; - while (tgt.hasRemaining()) { - if (!src.hasRemaining()) { // src expired first - tgt.reset(); - src.reset(); - found = false; - break; - } - if (!(tgt.get() == src.get())) { - tgt.reset(); - src.reset(); - found = false; - break; // no match - } - } - if (found) { - return pos; - } - } - } - return -1; // not found + String textString = text.toString(); + String subtextString = subtext.toString(); + return textString.indexOf(subtextString, start); } private GenericUDFUtils() { diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java index d9338a5..e2bbbda 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java @@ -20,6 +20,7 @@ import junit.framework.Assert; import junit.framework.TestCase; + import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; import org.apache.hadoop.io.Text; import org.junit.Test; @@ -44,5 +45,9 @@ public void testFindText() throws Exception { Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 0)); Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 6)); Assert.assertEquals(-1, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 7)); + + //Unicode case. + Assert.assertEquals(4, GenericUDFUtils.findText(new Text("НАСТРОЕние"), new Text("Р"), 0)); + Assert.assertEquals(15, GenericUDFUtils.findText(new Text("НАСТРОЕние НАСТРОЕние"), new Text("Р"), 11)); } }