Index: src/java/org/apache/lucene/util/StringHelper.java =================================================================== --- src/java/org/apache/lucene/util/StringHelper.java (revision 996900) +++ src/java/org/apache/lucene/util/StringHelper.java (working copy) @@ -45,7 +45,7 @@ * @return The number of common elements. */ public static final int bytesDifference(byte[] bytes1, int len1, byte[] bytes2, int len2) { - int len = len1 < len2 ? len1 : len2; + final int len = len1 < len2 ? len1 : len2; for (int i = 0; i < len; i++) if (bytes1[i] != bytes2[i]) return i; @@ -61,13 +61,15 @@ * @return The first position where the two strings differ. */ public static final int stringDifference(String s1, String s2) { - int len1 = s1.length(); - int len2 = s2.length(); - int len = len1 < len2 ? len1 : len2; - for (int i = 0; i < len; i++) { - if (s1.charAt(i) != s2.charAt(i)) { + final char[] left = s1.toCharArray(); + final char[] right = s2.toCharArray(); + final int len = left.length < right.length ? left.length : right.length; + for (int i = 0; i < len; ) { + final int cp = Character.codePointAt(left, i); + if(cp != Character.codePointAt(right, i)) { return i; } + i += Character.charCount(cp); } return len; } Index: src/test/org/apache/lucene/util/TestStringHelper.java =================================================================== --- src/test/org/apache/lucene/util/TestStringHelper.java (revision 996900) +++ src/test/org/apache/lucene/util/TestStringHelper.java (working copy) @@ -36,5 +36,15 @@ test2 = "test"; result = StringHelper.stringDifference(test1, test2); assertTrue(result == 4); + + String surrogateString = "Test\ud801\udc1cing"; + String twoSurrogateString = "Test\ud801\udc1c\ud801\udc1cing"; + + String brokenSurrogateString = "Test\ud8011cing"; + assertEquals(4, StringHelper.stringDifference(surrogateString, brokenSurrogateString)); + assertEquals(6, StringHelper.stringDifference(surrogateString, twoSurrogateString)); + } + + }