Index: src/test/java/org/apache/harmony/luni/tests/java/lang/CharacterTest.java =================================================================== --- src/test/java/org/apache/harmony/luni/tests/java/lang/CharacterTest.java (revision 490914) +++ src/test/java/org/apache/harmony/luni/tests/java/lang/CharacterTest.java (working copy) @@ -671,6 +671,10 @@ public void test_digitCI() { assertEquals("Returned incorrect digit", 1, Character.digit('1', 10)); assertEquals("Returned incorrect digit", 15, Character.digit('F', 16)); + assertEquals(-1, Character.digit('\u0000', 37)); + assertEquals(-1, Character.digit('\u0045', 10)); + assertEquals(10, Character.digit('\u0041', 20)); + assertEquals(10, Character.digit('\u0061', 20)); } /** @@ -737,6 +741,20 @@ .getNumericValue('\u2182')); assertEquals("Returned incorrect numeric value 6", 2, Character .getNumericValue('\uff12')); + assertEquals(50, Character.getNumericValue('\u216C')); + + assertEquals(10, Character.getNumericValue('\u0041')); + assertEquals(35, Character.getNumericValue('\u005A')); + assertEquals(10, Character.getNumericValue('\u0061')); + assertEquals(35, Character.getNumericValue('\u007A')); + assertEquals(10, Character.getNumericValue('\uFF21')); + + // FIXME depends on ICU4J + // assertEquals(35, Character.getNumericValue('\uFF3A')); + + assertEquals(10, Character.getNumericValue('\uFF41')); + assertEquals(35, Character.getNumericValue('\uFF5A')); + } /** @@ -751,7 +769,6 @@ assertEquals(2, Character.getNumericValue((int)'\uff12')); assertEquals(-1, Character.getNumericValue(0xFFFF)); - assertEquals(-1, Character.getNumericValue(0xFFFF)); assertEquals(0, Character.getNumericValue(0x1D7CE)); assertEquals(0, Character.getNumericValue(0x1D7D8)); assertEquals(-1, Character.getNumericValue(0x2F800)); @@ -801,6 +818,80 @@ assertEquals("Wrong constant for FORMAT", 16, Character.FORMAT); assertEquals("Wrong constant for PRIVATE_USE", 18, Character.PRIVATE_USE); + + assertEquals(Character.UNASSIGNED, Character.getType('\u9FFF')); + + assertEquals(Character.UPPERCASE_LETTER, Character.getType('\u0041')); + + assertEquals(Character.LOWERCASE_LETTER, Character.getType('\u0061')); + + assertEquals(Character.TITLECASE_LETTER, Character.getType('\u01C5')); + assertEquals(Character.TITLECASE_LETTER, Character.getType('\u1FFC')); + + assertEquals(Character.MODIFIER_LETTER, Character.getType('\u02B0')); + assertEquals(Character.MODIFIER_LETTER, Character.getType('\uFF9F')); + + assertEquals(Character.OTHER_LETTER, Character.getType('\u01BB')); + + assertEquals(Character.NON_SPACING_MARK, Character.getType('\u0F82')); + + assertEquals(Character.ENCLOSING_MARK, Character.getType('\u0488')); + assertEquals(Character.ENCLOSING_MARK, Character.getType('\u20DE')); + + assertEquals(Character.COMBINING_SPACING_MARK, Character.getType('\u1938')); + + assertEquals(Character.DECIMAL_DIGIT_NUMBER, Character.getType('\u194D')); + + assertEquals(Character.LETTER_NUMBER, Character.getType('\u2160')); + + assertEquals(Character.OTHER_NUMBER, Character.getType('\u00B2')); + + assertEquals(Character.SPACE_SEPARATOR, Character.getType('\u0020')); + assertEquals(Character.SPACE_SEPARATOR, Character.getType('\u3000')); + + assertEquals(Character.LINE_SEPARATOR, Character.getType('\u2028')); + + assertEquals(Character.PARAGRAPH_SEPARATOR, Character.getType('\u2029')); + + assertEquals(Character.CONTROL, Character.getType('\u0000')); + assertEquals(Character.CONTROL, Character.getType('\u009F')); + + assertEquals(Character.FORMAT, Character.getType('\u00AD')); + + assertEquals(Character.PRIVATE_USE, Character.getType('\uE000')); + + assertEquals(Character.SURROGATE, Character.getType('\uD800')); + assertEquals(Character.SURROGATE, Character.getType('\uDFFF')); + + assertEquals(Character.DASH_PUNCTUATION, Character.getType('\uFE31')); + assertEquals(Character.DASH_PUNCTUATION, Character.getType('\uFF0D')); + + assertEquals(Character.START_PUNCTUATION, Character.getType('\u0028')); + assertEquals(Character.START_PUNCTUATION, Character.getType('\uFF62')); + + assertEquals(Character.END_PUNCTUATION, Character.getType('\u0029')); + assertEquals(Character.END_PUNCTUATION, Character.getType('\uFF63')); + + assertEquals(Character.CONNECTOR_PUNCTUATION, Character.getType('\u005F')); + assertEquals(Character.CONNECTOR_PUNCTUATION, Character.getType('\uFF3F')); + + assertEquals(Character.OTHER_PUNCTUATION, Character.getType('\u2034')); + + assertEquals(Character.MATH_SYMBOL, Character.getType('\u002B')); + + assertEquals(Character.CURRENCY_SYMBOL, Character.getType('\u0024')); + assertEquals(Character.CURRENCY_SYMBOL, Character.getType('\uFFE6')); + + assertEquals(Character.MODIFIER_SYMBOL, Character.getType('\u005E')); + assertEquals(Character.MODIFIER_SYMBOL, Character.getType('\uFFE3')); + + assertEquals(Character.OTHER_SYMBOL, Character.getType('\u00A6')); + + assertEquals(Character.INITIAL_QUOTE_PUNCTUATION, Character.getType('\u00AB')); + assertEquals(Character.INITIAL_QUOTE_PUNCTUATION, Character.getType('\u2039')); + + assertEquals(Character.FINAL_QUOTE_PUNCTUATION, Character.getType('\u00BB')); + assertEquals(Character.FINAL_QUOTE_PUNCTUATION, Character.getType('\u203A')); } /** @@ -1038,6 +1129,7 @@ public void test_isMirrored_C() { assertTrue(Character.isMirrored('\u0028')); assertFalse(Character.isMirrored('\uFFFF')); + assertFalse(Character.isMirrored('\u0000')); } /** @@ -1093,7 +1185,8 @@ assertTrue("digit returned false", Character.isJavaIdentifierPart('9')); assertTrue("connecting char returned false", Character .isJavaIdentifierPart('_')); - assertTrue("ignorable control returned true", !Character +// RI fails because 0x200B changes category in Unicode 4.1 + assertTrue("ignorable control returned true", Character .isJavaIdentifierPart('\u200b')); assertTrue("semi returned true", !Character.isJavaIdentifierPart(';')); } @@ -1288,6 +1381,9 @@ public void test_isLowerCaseC() { assertTrue("lower returned false", Character.isLowerCase('a')); assertTrue("upper returned true", !Character.isLowerCase('T')); + + assertTrue(Character.isLowerCase('z')); + assertFalse(Character.isLowerCase('Z')); } /** @@ -1321,6 +1417,7 @@ public void test_isSpaceCharC() { assertTrue("space returned false", Character.isSpaceChar('\u0020')); assertTrue("non-space returned true", !Character.isSpaceChar('\n')); + assertTrue(Character.isSpaceChar('\u2000')); } /** @@ -1392,6 +1489,10 @@ assertTrue("'a' returned false", Character.isUnicodeIdentifierPart('a')); assertTrue("'2' returned false", Character.isUnicodeIdentifierPart('2')); assertTrue("'+' returned true", !Character.isUnicodeIdentifierPart('+')); + assertTrue(Character.isUnicodeIdentifierPart('\u1FA9')); + assertTrue(Character.isUnicodeIdentifierPart('\u0030')); + assertTrue(Character.isUnicodeIdentifierPart('\uFF10')); + } /** @@ -1451,6 +1552,10 @@ .isUnicodeIdentifierStart('2')); assertTrue("'+' returned true", !Character .isUnicodeIdentifierStart('+')); + assertTrue(Character.isUnicodeIdentifierStart('\u1FA9')); + assertTrue(Character.isUnicodeIdentifierStart('\u16EE')); + + assertFalse(Character.isUnicodeIdentifierStart('\u06F9')); } /** @@ -1487,6 +1592,12 @@ public void test_isUpperCaseC() { assertTrue("Incorrect case value", !Character.isUpperCase('t')); assertTrue("Incorrect case value", Character.isUpperCase('T')); + assertTrue(Character.isUpperCase('A')); + assertTrue(Character.isUpperCase('Z')); + + assertFalse(Character.isUpperCase('a')); + assertFalse(Character.isUpperCase('z')); + } /** @@ -1510,6 +1621,18 @@ public void test_isWhitespaceC() { assertTrue("space returned false", Character.isWhitespace('\n')); assertTrue("non-space returned true", !Character.isWhitespace('T')); + assertTrue(Character.isWhitespace('\u0009')); + + assertTrue(Character.isWhitespace('\u2000')); + assertTrue(Character.isWhitespace('\u200A')); + + assertTrue(Character.isWhitespace('\u2028')); + assertTrue(Character.isWhitespace('\u2029')); + + assertFalse(Character.isWhitespace('\u00A0')); + assertFalse(Character.isWhitespace('\u202F')); + + assertFalse(Character.isWhitespace('\uFEFF')); } /** @@ -1570,6 +1693,13 @@ */ public void test_toLowerCaseC() { assertEquals("Failed to change case", 't', Character.toLowerCase('T')); + assertEquals('a', Character.toLowerCase('A')); + assertEquals('a', Character.toLowerCase('a')); + assertEquals('z', Character.toLowerCase('Z')); + assertEquals('z', Character.toLowerCase('z')); + + assertEquals('\u0000', Character.toLowerCase('\u0000')); + assertEquals('\uFFFF', Character.toLowerCase('\uFFFF')); } /** @@ -1603,6 +1733,8 @@ 'A', Character.toTitleCase('A')); assertEquals("Incorrect title case for 1", '1', Character.toTitleCase('1')); + assertEquals('Z', Character.toTitleCase('z')); + assertEquals('Z', Character.toTitleCase('Z')); } /** @@ -1631,6 +1763,11 @@ 'A', Character.toUpperCase('A')); assertEquals("Incorrect upper case for 1", '1', Character.toUpperCase('1')); + assertEquals('\u0000', Character.toUpperCase('\u0000')); + assertEquals('\u0041', Character.toUpperCase('\u0061')); + assertEquals('\u005A', Character.toUpperCase('\u007A')); + assertEquals('\u0041', Character.toUpperCase('\u0041')); + assertEquals('\uFFFF', Character.toUpperCase('\uFFFF')); } /** @@ -1747,6 +1884,94 @@ .getDirectionality(0x202E)); assertEquals(Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, Character - .getDirectionality(0x202C)); + .getDirectionality(0x202C)); + // RI fails ,this is non-bug difference between Unicode 4.0 and 4.1 + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, + Character.getDirectionality(0x002B)); + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, + Character.getDirectionality(0xFF0B)); + } + + /** + * @tests java.lang.Character#getDirectionality(char) + */ + public void test_getDirectionaliy_C(){ + assertEquals(Character.DIRECTIONALITY_UNDEFINED, Character + .getDirectionality('\uFFFE')); + + assertEquals(Character.DIRECTIONALITY_LEFT_TO_RIGHT, Character + .getDirectionality('\u0041')); + + assertEquals(Character.DIRECTIONALITY_RIGHT_TO_LEFT, Character + .getDirectionality('\uFB4F')); + + assertEquals(Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, Character + .getDirectionality('\u0600')); + assertEquals(Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, Character + .getDirectionality('\uFEFC')); + + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER, Character + .getDirectionality('\u2070')); + + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, Character + .getDirectionality('\u0023')); + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, Character + .getDirectionality('\u17DB')); + + assertEquals(Character.DIRECTIONALITY_ARABIC_NUMBER, Character + .getDirectionality('\u0660')); + assertEquals(Character.DIRECTIONALITY_ARABIC_NUMBER, Character + .getDirectionality('\u066C')); + + assertEquals(Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, Character + .getDirectionality('\u002C')); + assertEquals(Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, Character + .getDirectionality('\uFF1A')); + + assertEquals(Character.DIRECTIONALITY_NONSPACING_MARK, Character + .getDirectionality('\u17CE')); + + assertEquals(Character.DIRECTIONALITY_BOUNDARY_NEUTRAL, Character + .getDirectionality('\u0000')); + + assertEquals(Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR, Character + .getDirectionality('\u2029')); + + assertEquals(Character.DIRECTIONALITY_SEGMENT_SEPARATOR, Character + .getDirectionality('\u0009')); + assertEquals(Character.DIRECTIONALITY_SEGMENT_SEPARATOR, Character + .getDirectionality('\u001F')); + + assertEquals(Character.DIRECTIONALITY_WHITESPACE, Character + .getDirectionality('\u0020')); + assertEquals(Character.DIRECTIONALITY_WHITESPACE, Character + .getDirectionality('\u3000')); + + assertEquals(Character.DIRECTIONALITY_OTHER_NEUTRALS, Character + .getDirectionality('\u2FF0')); + + assertEquals(Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, Character + .getDirectionality('\u202A')); + + assertEquals(Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, Character + .getDirectionality('\u202D')); + + assertEquals(Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, Character + .getDirectionality('\u202B')); + + assertEquals(Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, Character + .getDirectionality('\u202E')); + + assertEquals(Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, Character + .getDirectionality('\u202C')); + + // RI fails ,this is non-bug difference between Unicode 4.0 and 4.1 + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, + Character.getDirectionality('\u002B')); + assertEquals(Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, + Character.getDirectionality('\uFF0B')); + } + + } Index: src/main/java/java/lang/Character.java =================================================================== --- src/main/java/java/lang/Character.java (revision 490914) +++ src/main/java/java/lang/Character.java (working copy) @@ -21,8 +21,6 @@ import java.util.SortedMap; import java.util.TreeMap; -import org.apache.harmony.luni.util.BinarySearch; - import com.ibm.icu.lang.UCharacter; /** @@ -2433,29 +2431,7 @@ * otherwise -1. */ public static int digit(char c, int radix) { - if (radix >= MIN_RADIX && radix <= MAX_RADIX) { - if (c < 128) { - // Optimized for ASCII - int result = -1; - if ('0' <= c && c <= '9') { - result = c - '0'; - } else if ('a' <= c && c <= 'z') { - result = c - ('a' - 10); - } else if ('A' <= c && c <= 'Z') { - result = c - ('A' - 10); - } - return result < radix ? result : -1; - } - int result = BinarySearch.binarySearchRange(digitKeys, c); - if (result >= 0 && c <= digitValues[result * 2]) { - int value = (char) (c - digitValues[result * 2 + 1]); - if (value >= radix) { - return -1; - } - return value; - } - } - return -1; + return UCharacter.digit(c, radix); } /** @@ -2523,32 +2499,7 @@ * the numeric value is not an int >= 0 */ public static int getNumericValue(char c) { - if (c < 128) { - // Optimized for ASCII - if (c >= '0' && c <= '9') { - return c - '0'; - } - if (c >= 'a' && c <= 'z') { - return c - ('a' - 10); - } - if (c >= 'A' && c <= 'Z') { - return c - ('A' - 10); - } - return -1; - } - int result = BinarySearch.binarySearchRange(numericKeys, c); - if (result >= 0 && c <= numericValues[result * 2]) { - char difference = numericValues[result * 2 + 1]; - if (difference == 0) { - return -2; - } - // Value is always positive, must be negative value - if (difference > c) { - return c - (short) difference; - } - return c - difference; - } - return -1; + return UCharacter.getNumericValue(c); } /** @@ -2571,16 +2522,7 @@ * @return the Unicode category */ public static int getType(char c) { - int result = BinarySearch.binarySearchRange(typeKeys, c); - int high = typeValues[result * 2]; - if (c <= high) { - int code = typeValues[result * 2 + 1]; - if (code < 0x100) { - return code; - } - return (c & 1) == 1 ? code >> 8 : code & 0xff; - } - return UNASSIGNED; + return type(c); } /** @@ -2591,6 +2533,10 @@ * @return the Unicode category */ public static int getType(int codePoint) { + return type(codePoint); + } + + private static int type(int codePoint){ int type = UCharacter.getType(codePoint); // the type values returned by UCharacter are not compatible with what @@ -2609,16 +2555,7 @@ * @return the Unicode directionality */ public static byte getDirectionality(char c) { - int result = BinarySearch.binarySearchRange(bidiKeys, c); - int high = bidiValues[result * 2]; - if (c <= high) { - int code = bidiValues[result * 2 + 1]; - if (code < 0x100) { - return (byte) (code - 1); - } - return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1); - } - return DIRECTIONALITY_UNDEFINED; + return directionality(c); } /** @@ -2629,6 +2566,10 @@ * @return the Unicode directionality */ public static byte getDirectionality(int codePoint) { + return directionality(codePoint); + } + + public static byte directionality(int codePoint) { if (getType(codePoint) == Character.UNASSIGNED) { return Character.DIRECTIONALITY_UNDEFINED; } @@ -2648,12 +2589,7 @@ * @return true if the character is mirrored, false otherwise */ public static boolean isMirrored(char c) { - int value = c / 16; - if (value >= mirrored.length) { - return false; - } - int bit = 1 << (c % 16); - return (mirrored[value] & bit) != 0; + return UCharacter.isMirrored(c); } /** @@ -2941,15 +2877,7 @@ * @return true when the character is a lower case letter, false otherwise */ public static boolean isLowerCase(char c) { - // Optimized case for ASCII - if ('a' <= c && c <= 'z') { - return true; - } - if (c < 128) { - return false; - } - - return getType(c) == LOWERCASE_LETTER; + return UCharacter.isLowerCase(c); } /** @@ -2984,14 +2912,7 @@ * otherwise */ public static boolean isSpaceChar(char c) { - if (c == 0x20 || c == 0xa0 || c == 0x1680) { - return true; - } - if (c < 0x2000) { - return false; - } - return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f - || c == 0x3000; + return UCharacter.isSpaceChar(c); } /** @@ -3017,18 +2938,7 @@ * otherwise */ public static boolean isTitleCase(char c) { - if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb' || c == '\u01f2') { - return true; - } - if (c >= '\u1f88' && c <= '\u1ffc') { - // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf - if (c > '\u1faf') { - return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc'; - } - int last = c & 0xf; - return last >= 8 && last <= 0xf; - } - return false; + return UCharacter.isTitleCase(c); } /** @@ -3053,12 +2963,7 @@ * false otherwise */ public static boolean isUnicodeIdentifierPart(char c) { - int type = getType(c); - return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) - || type == CONNECTOR_PUNCTUATION - || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) - || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK - || isIdentifierIgnorable(c); + return UCharacter.isUnicodeIdentifierPart(c); } /** @@ -3084,9 +2989,7 @@ * false otherwise */ public static boolean isUnicodeIdentifierStart(char c) { - int type = getType(c); - return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) - || type == LETTER_NUMBER; + return UCharacter.isUnicodeIdentifierStart(c); } /** @@ -3110,15 +3013,7 @@ * @return true when the character is a upper case letter, false otherwise */ public static boolean isUpperCase(char c) { - // Optimized case for ASCII - if ('A' <= c && c <= 'Z') { - return true; - } - if (c < 128) { - return false; - } - - return getType(c) == UPPERCASE_LETTER; + return UCharacter.isUpperCase(c); } /** @@ -3141,17 +3036,7 @@ * in Java, otherwise false. */ public static boolean isWhitespace(char c) { - // Optimized case for ASCII - if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) { - return true; - } - if (c == 0x1680) { - return true; - } - if (c < 0x2000 || c == 0x2007) { - return false; - } - return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000; + return UCharacter.isWhitespace(c); } /** @@ -3188,32 +3073,7 @@ * counterpart, otherwise just c */ public static char toLowerCase(char c) { - // Optimized case for ASCII - if ('A' <= c && c <= 'Z') { - return (char) (c + ('a' - 'A')); - } - if (c < 128) { - return c; - } - - int result = BinarySearch.binarySearchRange(lowercaseKeys, c); - if (result >= 0) { - boolean by2 = false; - char start = lowercaseKeys.charAt(result); - char end = lowercaseValues[result * 2]; - if ((start & 0x8000) != (end & 0x8000)) { - end ^= 0x8000; - by2 = true; - } - if (c <= end) { - if (by2 && (c & 1) != (start & 1)) { - return c; - } - char mapping = lowercaseValues[result * 2 + 1]; - return (char) (c + mapping); - } - } - return c; + return (char)UCharacter.toLowerCase(c); } /** @@ -3260,14 +3120,7 @@ * @return the title case equivalent of the character */ public static char toTitleCase(char c) { - if (isTitleCase(c)) { - return c; - } - int result = BinarySearch.binarySearch(titlecaseKeys, c); - if (result >= 0) { - return titlecaseValues[result]; - } - return toUpperCase(c); + return (char)UCharacter.toTitleCase(c); } /** @@ -3292,32 +3145,7 @@ * counterpart, otherwise just c */ public static char toUpperCase(char c) { - // Optimized case for ASCII - if ('a' <= c && c <= 'z') { - return (char) (c - ('a' - 'A')); - } - if (c < 128) { - return c; - } - - int result = BinarySearch.binarySearchRange(uppercaseKeys, c); - if (result >= 0) { - boolean by2 = false; - char start = uppercaseKeys.charAt(result); - char end = uppercaseValues[result * 2]; - if ((start & 0x8000) != (end & 0x8000)) { - end ^= 0x8000; - by2 = true; - } - if (c <= end) { - if (by2 && (c & 1) != (start & 1)) { - return c; - } - char mapping = uppercaseValues[result * 2 + 1]; - return (char) (c + mapping); - } - } - return c; + return (char)UCharacter.toUpperCase(c); } /**