diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
index 7f4a807..33d0822 100644
--- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
+++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
@@ -447,8 +447,7 @@ public static String getOrdinal(int i) {
/**
* Finds any occurence of subtext from text in the
- * backing buffer, for avoiding string encoding and decoding. Shamelessly copy
- * from {@link org.apache.hadoop.io.Text#find(String, int)}.
+ * backing buffer.
*/
public static int findText(Text text, Text subtext, int start) {
// src.position(start) can't accept negative numbers.
@@ -463,38 +462,9 @@ public static int findText(Text text, Text subtext, int start) {
return -1;
}
- ByteBuffer src = ByteBuffer.wrap(text.getBytes(), 0, text.getLength());
- ByteBuffer tgt = ByteBuffer
- .wrap(subtext.getBytes(), 0, subtext.getLength());
- byte b = tgt.get();
- src.position(start);
-
- while (src.hasRemaining()) {
- if (b == src.get()) { // matching first byte
- src.mark(); // save position in loop
- tgt.mark(); // save position in target
- boolean found = true;
- int pos = src.position() - 1;
- while (tgt.hasRemaining()) {
- if (!src.hasRemaining()) { // src expired first
- tgt.reset();
- src.reset();
- found = false;
- break;
- }
- if (!(tgt.get() == src.get())) {
- tgt.reset();
- src.reset();
- found = false;
- break; // no match
- }
- }
- if (found) {
- return pos;
- }
- }
- }
- return -1; // not found
+ String textString = text.toString();
+ String subtextString = subtext.toString();
+ return textString.indexOf(subtextString, start);
}
private GenericUDFUtils() {
diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java
index d9338a5..e2bbbda 100644
--- ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java
+++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestGenericUDFUtils.java
@@ -20,6 +20,7 @@
import junit.framework.Assert;
import junit.framework.TestCase;
+
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.io.Text;
import org.junit.Test;
@@ -44,5 +45,9 @@ public void testFindText() throws Exception {
Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 0));
Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 6));
Assert.assertEquals(-1, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 7));
+
+ //Unicode case.
+ Assert.assertEquals(4, GenericUDFUtils.findText(new Text("НАСТРОЕние"), new Text("Р"), 0));
+ Assert.assertEquals(15, GenericUDFUtils.findText(new Text("НАСТРОЕние НАСТРОЕние"), new Text("Р"), 11));
}
}