Index: working_classlib/modules/regex/src/main/java/java/util/regex/Matcher.java =================================================================== --- working_classlib/modules/regex/src/main/java/java/util/regex/Matcher.java (revision 766087) +++ working_classlib/modules/regex/src/main/java/java/util/regex/Matcher.java Fri Apr 17 17:15:25 PDT 2009 @@ -22,7 +22,31 @@ import org.apache.harmony.regex.internal.nls.Messages; /** - * Note: main functionality of this class is hidden into nodes match methods. + * Provides a means of matching regular expressions against a given input, + * finding occurrences of regular expressions in a given input, or replacing + * parts of a given input. A {@code Matcher} instance has an associated {@link + * Pattern} instance and an input text. A typical use case is to + * iteratively find all occurrences of the {@code Pattern}, until the end of + * the input is reached, as the following example illustrates: + * + *

+ * + *

+ * Pattern p = Pattern.compile("[A-Za-z]+");
+ *
+ * Matcher m = p.matcher("Hello, Android!");
+ * while (m.find()) {
+ *     System.out.println(m.group()); // prints "Hello" and "Android"
+ * }
+ * 
+ * + *

+ * + * The {@code Matcher} has a state that results from the previous operations. + * For example, it knows whether the most recent attempt to find the + * {@code Pattern} was successful and at which position the next attempt would + * resume the search. Depending on the application's needs, it may become + * necessary to explicitly {@link #reset()} this state from time to time. */ public final class Matcher implements MatchResult { @@ -53,12 +77,25 @@ private ArrayList replacementParts = null; /** - * @com.intel.drl.spec_ref + * Appends a literal part of the input plus a replacement for the current + * match to a given {@link StringBuffer}. The literal part is exactly the + * part of the input between the previous match and the current match. The + * method can be used in conjunction with {@link #find()} and + * {@link #appendTail(StringBuffer)} to walk through the input and replace + * all occurrences of the {@code Pattern} with something else. + * + * @param buffer + * the {@code StringBuffer} to append to. + * @param replacement + * the replacement text. + * @return the {@code Matcher} itself. + * @throws IllegalStateException + * if no successful match has been made. */ - public Matcher appendReplacement(StringBuffer sb, String replacement) { + public Matcher appendReplacement(StringBuffer buffer, String replacement) { processedRepl = processReplacement(replacement); - sb.append(string.subSequence(appendPos, start())); - sb.append(processedRepl); + buffer.append(string.subSequence(appendPos, start())); + buffer.append(processedRepl); appendPos = end(); return this; } @@ -148,18 +185,31 @@ } /** - * @com.intel.drl.spec_ref + * Provides a new input and resets the {@code Matcher}. This results in the + * region being set to the whole input. Results of a previous find get lost. + * The next attempt to find an occurrence of the {@link Pattern} in the + * string will start at the beginning of the input. + * + * @param input + * the new input sequence. + * + * @return the {@code Matcher} itself. */ - public Matcher reset(CharSequence newSequence) { - if (newSequence == null) { + public Matcher reset(CharSequence input) { + if (input == null) { throw new NullPointerException(Messages.getString("regex.01")); //$NON-NLS-1$ } - this.string = newSequence; + this.string = input; return reset(); } /** - * @com.intel.drl.spec_ref + * Resets the {@code Matcher}. This results in the region being set to the + * whole input. Results of a previous find get lost. The next attempt to + * find an occurrence of the {@link Pattern} in the string will start at the + * beginning of the input. + * + * @return the {@code Matcher} itself. */ public Matcher reset() { this.leftBound = 0; @@ -172,44 +222,57 @@ } /** - * @com.intel.drl.spec_ref + * Resets this matcher and sets a region. Only characters inside the region + * are considered for a match. + * + * @param start + * the first character of the region. + * @param end + * the first character after the end of the region. + * @return the {@code Matcher} itself. */ - public Matcher region(int leftBound, int rightBound) { + public Matcher region(int start, int end) { - if (leftBound > rightBound || leftBound < 0 || rightBound < 0 - || leftBound > string.length() || rightBound > string.length()) { + if (start > end || start < 0 || end < 0 + || start > string.length() || end > string.length()) { throw new IndexOutOfBoundsException( Messages.getString("regex.02", //$NON-NLS-1$ - Integer.toString(leftBound), Integer.toString(rightBound))); + Integer.toString(start), Integer.toString(end))); } - this.leftBound = leftBound; - this.rightBound = rightBound; - matchResult.reset(null, leftBound, rightBound); + this.leftBound = start; + this.rightBound = end; + matchResult.reset(null, start, end); appendPos = 0; replacement = null; return this; } - /** + /** - * TODO: appendTail(StringBuffer) javadoc + * Appends the (unmatched) remainder of the input to the given + * {@link StringBuffer}. The method can be used in conjunction with + * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to + * walk through the input and replace all matches of the {@code Pattern} + * with something else. - * + * - * @param sb - * @return + * @param buffer + * the {@code StringBuffer} to append to. + * @return the {@code StringBuffer}. + * @throws IllegalStateException + * if no successful match has been made. - */ + */ - public StringBuffer appendTail(StringBuffer sb) { - return sb.append(string.subSequence(appendPos, string.length())); + public StringBuffer appendTail(StringBuffer buffer) { + return buffer.append(string.subSequence(appendPos, string.length())); } - /** + /** - * This is very similar to replaceAll except only the first occurrence of a - * sequence matching the pattern is replaced. + * Replaces the first occurrence of this matcher's pattern in the input with + * a given string. - * - * @param replacement + * + * @param replacement - * A string to replace occurrences of character sequences - * matching the pattern. - * @return A new string with replacements inserted + * the replacement text. + * @return the modified input string. - */ + */ public String replaceFirst(String replacement) { reset(); if (find()) { @@ -222,16 +285,14 @@ } - /** + /** - * Replace all occurrences of character sequences which match the pattern - * with the given replacement string. The replacement string may refer to - * capturing groups using the syntax "$". + * Replaces all occurrences of this matcher's pattern in the input with a + * given string. - * - * @param replacement + * + * @param replacement - * A string to replace occurrences of character sequences - * matching the pattern. - * @return A new string with replacements inserted + * the replacement text. + * @return the modified input string. - */ + */ public String replaceAll(String replacement) { StringBuffer sb = new StringBuffer(); reset(); @@ -242,40 +303,59 @@ return appendTail(sb).toString(); } - /** + /** - * Return a reference to the pattern used by this Matcher. + * Returns the {@link Pattern} instance used inside this matcher. - * + * - * @return A reference to the pattern used by this Matcher. + * @return the {@code Pattern} instance. - */ + */ public Pattern pattern() { return pat; } /** - * @com.intel.drl.spec_ref + * Returns the text that matched a given group of the regular expression. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * @return the text that matched the group. + * @throws IllegalStateException + * if no successful match has been made. */ - public String group(int groupIndex) { - return matchResult.group(groupIndex); + public String group(int group) { + return matchResult.group(group); } /** - * @com.intel.drl.spec_ref + * Returns the text that matched the whole regular expression. + * + * @return the text. + * @throws IllegalStateException + * if no successful match has been made. */ public String group() { return group(0); } /** - * @com.intel.drl.spec_ref + * Returns the next occurrence of the {@link Pattern} in the input. The + * method starts the search from the given character in the input. + * + * @param start + * The index in the input at which the find operation is to + * begin. If this is less than the start of the region, it is + * automatically adjusted to that value. If it is beyond the end + * of the region, the method will fail. + * @return true if (and only if) a match has been found. */ - public boolean find(int startIndex) { + public boolean find(int start) { int stringLength = string.length(); - if (startIndex < 0 || startIndex > stringLength) + if (start < 0 || start > stringLength) throw new IndexOutOfBoundsException(Messages.getString("regex.03", //$NON-NLS-1$ - new Integer(startIndex))); + new Integer(start))); - startIndex = findAt(startIndex); - if (startIndex >= 0 && matchResult.isValid()) { + start = findAt(start); + if (start >= 0 && matchResult.isValid()) { matchResult.finalizeMatch(); return true; } @@ -294,14 +374,14 @@ return foundIndex; } - /** + /** - * The find() method matches the pattern against the character sequence - * beginning at the character after the last match or at the beginning of - * the sequence if called immediately after reset(). The method returns true - * if and only if a match is found. + * Returns the next occurrence of the {@link Pattern} in the input. If a + * previous match was successful, the method continues the search from the + * first character following that match in the input. Otherwise it searches + * either from the region start (if one has been set), or from position 0. - * + * - * @return A boolean indicating if the pattern was matched. + * @return true if (and only if) a match has been found. - */ + */ public boolean find() { int length = string.length(); if (!hasTransparentBounds()) @@ -321,45 +401,66 @@ } /** - * @com.intel.drl.spec_ref + * Returns the index of the first character of the text that matched a given + * group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. */ - public int start(int groupIndex) { - return matchResult.start(groupIndex); + public int start(int group) { + return matchResult.start(group); } /** - * @com.intel.drl.spec_ref + * Returns the index of the first character following the text that matched + * a given group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. */ - public int end(int groupIndex) { - return matchResult.end(groupIndex); + public int end(int group) { + return matchResult.end(group); } - /** + /** - * This method is identical in function to the Pattern.matches() method. It - * returns true if and only if the regular expression pattern matches the - * entire input character sequence. + * Tries to match the {@link Pattern} against the entire region (or the + * entire input, if no region has been set). - * + * - * @return A boolean indicating if the pattern matches the entire input - * character sequence. + * @return true if (and only if) the {@code Pattern} matches the entire + * region. - */ + */ public boolean matches() { return lookingAt(leftBound, Matcher.MODE_MATCH); } /** - * @com.intel.drl.spec_ref + * Returns a replacement string for the given one that has all backslashes + * and dollar signs escaped. + * + * @param s + * the input string. + * @return the input string, with all backslashes and dollar signs having + * been escaped. */ - public static String quoteReplacement(String string) { + public static String quoteReplacement(String s) { // first check whether we have smth to quote - if (string.indexOf('\\') < 0 && string.indexOf('$') < 0) - return string; - StringBuffer res = new StringBuffer(string.length() * 2); + if (s.indexOf('\\') < 0 && s.indexOf('$') < 0) + return s; + StringBuffer res = new StringBuffer(s.length() * 2); char ch; - int len = string.length(); + int len = s.length(); for (int i = 0; i < len; i++) { - switch (ch = string.charAt(i)) { + switch (ch = s.charAt(i)) { case '$': res.append('\\'); res.append('$'); @@ -392,15 +493,13 @@ return false; } - /** + /** - * This method attempts to match the pattern against the character sequence - * starting at the beginning. If the pattern matches even a prefix of the - * input character sequence, lookingAt() will return true. Otherwise it will - * return false. + * Tries to match the {@link Pattern}, starting from the beginning of the + * region (or the beginning of the input, if no region has been set). + * Doesn't require the {@code Pattern} to match against the whole region. - * + * - * @return A boolean indicating if the pattern matches a prefix of the input - * character sequence. + * @return true if (and only if) the {@code Pattern} matches. - */ + */ public boolean lookingAt() { return lookingAt(leftBound, Matcher.MODE_FIND); } @@ -413,37 +512,61 @@ } /** - * @com.intel.drl.spec_ref + * Returns the index of the first character of the text that matched the + * whole regular expression. + * + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. */ public int start() { return start(0); } - /** + /** - * Return the number of capturing groups in the pattern. + * Returns the number of groups in the results, which is always equal to + * the number of groups in the original regular expression. - * + * - * @return The number of capturing groups in the pattern. + * @return the number of groups. - */ + */ public int groupCount() { return matchResult.groupCount(); } /** - * @com.intel.drl.spec_ref + * Returns the index of the first character following the text that matched + * the whole regular expression. + * + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. */ public int end() { return end(0); } /** - * @com.intel.drl.spec_ref + * Converts the current match into a separate {@link MatchResult} instance + * that is independent from this matcher. The new object is unaffected when + * the state of this matcher changes. + * + * @return the new {@code MatchResult}. + * @throws IllegalStateException + * if no successful match has been made. */ public MatchResult toMatchResult() { return this.matchResult.cloneImpl(); } /** - * @com.intel.drl.spec_ref + * Determines whether this matcher has anchoring bounds enabled or not. When + * anchoring bounds are enabled, the start and end of the input match the + * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled + * by default. + * + * @param value + * the new value for anchoring bounds. + * @return the {@code Matcher} itself. */ public Matcher useAnchoringBounds(boolean value) { matchResult.useAnchoringBounds(value); @@ -451,14 +574,26 @@ } /** - * @com.intel.drl.spec_ref + * Indicates whether this matcher has anchoring bounds enabled. When + * anchoring bounds are enabled, the start and end of the input match the + * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled + * by default. + * + * @return true if (and only if) the {@code Matcher} uses anchoring bounds. */ public boolean hasAnchoringBounds() { return matchResult.hasAnchoringBounds(); } /** - * @com.intel.drl.spec_ref + * Determines whether this matcher has transparent bounds enabled or not. + * When transparent bounds are enabled, the parts of the input outside the + * region are subject to lookahead and lookbehind, otherwise they are not. + * Transparent bounds are disabled by default. + * + * @param value + * the new value for transparent bounds. + * @return the {@code Matcher} itself. */ public Matcher useTransparentBounds(boolean value) { matchResult.useTransparentBounds(value); @@ -466,53 +601,77 @@ } /** - * @com.intel.drl.spec_ref + * Indicates whether this matcher has transparent bounds enabled. When + * transparent bounds are enabled, the parts of the input outside the region + * are subject to lookahead and lookbehind, otherwise they are not. + * Transparent bounds are disabled by default. + * + * @return true if (and only if) the {@code Matcher} uses anchoring bounds. */ public boolean hasTransparentBounds() { return matchResult.hasTransparentBounds(); } /** - * @com.intel.drl.spec_ref + * Returns this matcher's region start, that is, the first character that is + * considered for a match. + * + * @return the start of the region. */ public int regionStart() { return matchResult.getLeftBound(); } /** - * @com.intel.drl.spec_ref + * Returns this matcher's region end, that is, the first character that is + * not considered for a match. + * + * @return the end of the region. */ public int regionEnd() { return matchResult.getRightBound(); } /** - * @com.intel.drl.spec_ref + * Indicates whether more input might change a successful match into an + * unsuccessful one. + * + * @return true if (and only if) more input might change a successful match + * into an unsuccessful one. */ public boolean requireEnd() { return matchResult.requireEnd; } /** - * @com.intel.drl.spec_ref + * Indicates whether the last match hit the end of the input. + * + * @return true if (and only if) the last match hit the end of the input. */ public boolean hitEnd() { return matchResult.hitEnd; } /** - * @com.intel.drl.spec_ref + * Sets a new pattern for the {@code Matcher}. Results of a previous find + * get lost. The next attempt to find an occurrence of the {@link Pattern} + * in the string will start at the beginning of the input. + * + * @param pattern + * the new {@code Pattern}. + * + * @return the {@code Matcher} itself. */ - public Matcher usePattern(Pattern pat) { - if (pat == null) { + public Matcher usePattern(Pattern pattern) { + if (pattern == null) { throw new IllegalArgumentException(Messages.getString("regex.1B")); } int startIndex = matchResult.getPreviousMatchEnd(); int mode = matchResult.mode(); - this.pat = pat; - this.start = pat.start; + this.pat = pattern; + this.start = pattern.start; matchResult = new MatchResultImpl(this.string, leftBound, rightBound, - pat.groupCount(), pat.compCount(), pat.consCount()); + pattern.groupCount(), pattern.compCount(), pattern.consCount()); matchResult.setStartIndex(startIndex); matchResult.setMode(mode); return this; Index: working_classlib/modules/regex/src/main/java/java/util/regex/MatchResult.java =================================================================== --- working_classlib/modules/regex/src/main/java/java/util/regex/MatchResult.java (revision 766087) +++ working_classlib/modules/regex/src/main/java/java/util/regex/MatchResult.java Fri Apr 17 17:15:25 PDT 2009 @@ -22,45 +22,83 @@ package java.util.regex; /** - * @com.intel.drl.spec_ref + * Holds the results of a successful match of a {@link Pattern} against a + * given string. The result is divided into groups, with one group for each + * pair of parentheses in the regular expression and an additional group for + * the whole regular expression. The start, end, and contents of each group + * can be queried. - * + * + * @see Matcher + * @see Matcher#toMatchResult() + * * @author Nikolay A. Kuznetsov * @version $Revision: 1.6.2.2 $ */ public interface MatchResult { /** - * @com.intel.drl.spec_ref + * Returns the index of the first character following the text that matched + * the whole regular expression. + * + * @return the character index. */ int end(); /** - * @com.intel.drl.spec_ref + * Returns the index of the first character following the text that matched + * a given group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * + * @return the character index. */ int end(int group); /** - * @com.intel.drl.spec_ref + * Returns the text that matched the whole regular expression. + * + * @return the text. */ String group(); /** - * @com.intel.drl.spec_ref + * Returns the text that matched a given group of the regular expression. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * + * @return the text that matched the group. */ String group(int group); /** - * @com.intel.drl.spec_ref + * Returns the number of groups in the result, which is always equal to + * the number of groups in the original regular expression. + * + * @return the number of groups. */ int groupCount(); /** - * @com.intel.drl.spec_ref + * Returns the index of the first character of the text that matched + * the whole regular expression. + * + * @return the character index. */ int start(); /** - * @com.intel.drl.spec_ref + * Returns the index of the first character of the text that matched a given + * group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * + * @return the character index. */ int start(int group); } Index: working_classlib/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java =================================================================== --- working_classlib/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java (revision 766087) +++ working_classlib/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java Fri Apr 17 17:15:25 PDT 2009 @@ -26,39 +26,74 @@ import org.apache.harmony.regex.internal.nls.Messages; /** - * @com.intel.drl.spec_ref + * Encapsulates a syntax error that occurred during the compilation of a + * {@link Pattern}. Might include a detailed description, the original regular + * expression, and the index at which the error occurred. - * + * + * @see Pattern#compile(String) + * @see Pattern#compile(java.lang.String,int) + * * @author Nikolay A. Kuznetsov * @version $Revision: 1.7.2.2 $ */ public class PatternSyntaxException extends IllegalArgumentException { private static final long serialVersionUID = -3864639126226059218L; - + + /** + * Holds the description of the syntax error, or null if the description is + * not known. + */ private String desc; - + + /** + * Holds the syntactically incorrect regular expression, or null if the + * regular expression is not known. + */ private String pattern; - + + /** + * Holds the index around which the error occured, or -1, in case it is + * unknown. + */ private int index = -1; /** - * @com.intel.drl.spec_ref + * Creates a new PatternSyntaxException for a given message, pattern, and + * error index. + * + * @param description + * the description of the syntax error, or {@code null} if the + * description is not known. + * @param pattern + * the syntactically incorrect regular expression, or + * {@code null} if the regular expression is not known. + * @param index + * the character index around which the error occurred, or -1 if + * the index is not known. */ - public PatternSyntaxException(String desc, String pattern, int index) { - this.desc = desc; + public PatternSyntaxException(String description, String pattern, int index) { + this.desc = description; this.pattern = pattern; this.index = index; } /** - * @com.intel.drl.spec_ref + * Returns the syntactically incorrect regular expression. + * + * @return the regular expression. + * */ public String getPattern() { return pattern; } /** - * @com.intel.drl.spec_ref + * Returns a detailed error message for the exception. The message is + * potentially multi-line, and it might include a detailed description, the + * original regular expression, and the index at which the error occured. + * + * @return the error message. */ public String getMessage() { String filler = ""; //$NON-NLS-1$ @@ -73,14 +108,21 @@ } /** - * @com.intel.drl.spec_ref + * Returns the description of the syntax error, or {@code null} if the + * description is not known. + * + * @return the description. */ public String getDescription() { return desc; } /** - * @com.intel.drl.spec_ref + * Returns the character index around which the error occurred, or -1 if the + * index is not known. + * + * @return the index. + * */ public int getIndex() { return index; Index: working_classlib/modules/regex/src/main/java/java/util/regex/Pattern.java =================================================================== --- working_classlib/modules/regex/src/main/java/java/util/regex/Pattern.java (revision 766087) +++ working_classlib/modules/regex/src/main/java/java/util/regex/Pattern.java Fri Apr 17 17:15:25 PDT 2009 @@ -29,12 +29,33 @@ /** - * Pattern implements a compiler for regular expressions as defined by the J2SE - * specification. The regular expression syntax is largely similar to the syntax - * defined by Perl 5 but has both omissions and extensions. A formal and - * complete definition of the regular expression syntax is not provided by the - * J2SE speTBD (TODO) + * Represents a pattern used for matching, searching, or replacing strings. + * {@code Pattern}s are specified in terms of regular expressions and compiled + * using an instance of this class. They are then used in conjunction with a + * {@link Matcher} to perform the actual search. + *

+ * A typical use case looks like this: + *

+ *

+ * Pattern p = Pattern.compile("Hello, A[a-z]*!");
- * 
+ *
+ * Matcher m = p.matcher("Hello, Android!");
+ * boolean b1 = m.matches(); // true
+ *
+ * m.setInput("Hello, Robot!");
+ * boolean b2 = m.matches(); // false
+ * 
+ *

+ * The above code could also be written in a more compact fashion, though this + * variant is less efficient, since {@code Pattern} and {@code Matcher} objects + * are created on the fly instead of being reused. + * fashion: + *

+ *     boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true
+ *     boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!");   // false
+ * 
+ * + * @see Matcher */ public final class Pattern implements Serializable { @@ -43,42 +64,56 @@ static final boolean _DEBUG_ = false; /** - * @com.intel.drl.spec_ref + * This constant specifies that a pattern matches Unix line endings ('\n') + * only against the '.', '^', and '$' meta characters. */ public static final int UNIX_LINES = 1 << 0; /** - * @com.intel.drl.spec_ref + * This constant specifies that a {@code Pattern} is matched + * case-insensitively. That is, the patterns "a+" and "A+" would both match + * the string "aAaAaA". */ public static final int CASE_INSENSITIVE = 1 << 1; /** - * @com.intel.drl.spec_ref + * This constant specifies that a {@code Pattern} may contain whitespace or + * comments. Otherwise comments and whitespace are taken as literal + * characters. */ public static final int COMMENTS = 1 << 2; /** - * @com.intel.drl.spec_ref + * This constant specifies that the meta characters '^' and '$' match only + * the beginning and end end of an input line, respectively. Normally, they + * match the beginning and the end of the complete input. */ public static final int MULTILINE = 1 << 3; /** - * @com.intel.drl.spec_ref + * This constant specifies that the whole {@code Pattern} is to be taken + * literally, that is, all meta characters lose their meanings. */ public static final int LITERAL = 1 << 4; /** - * @com.intel.drl.spec_ref + * This constant specifies that the '.' meta character matches arbitrary + * characters, including line endings, which is normally not the case. */ public static final int DOTALL = 1 << 5; /** - * @com.intel.drl.spec_ref + * This constant specifies that a {@code Pattern} is matched + * case-insensitively with regard to all Unicode characters. It is used in + * conjunction with the {@link #CASE_INSENSITIVE} constant to extend its + * meaning to all Unicode characters. */ public static final int UNICODE_CASE = 1 << 6; /** - * @com.intel.drl.spec_ref + * This constant specifies that a character in a {@code Pattern} and a + * character in the input string only match if they are canonically + * equivalent. */ public static final int CANON_EQ = 1 << 7; @@ -128,45 +163,64 @@ transient AbstractSet start = null; - /** + /** - * Create a matcher for this pattern and a given input character sequence + * Returns a {@link Matcher} for the {@code Pattern} and a given input. The + * {@code Matcher} can be used to match the {@code Pattern} against the + * whole input, find occurrences of the {@code Pattern} in the input, or + * replace parts of the input. - * + * - * @param cs - * The input character sequence - * @return A new matcher + * @param input + * the input to process. + * + * @return the resulting {@code Matcher}. - */ + */ - public Matcher matcher(CharSequence cs) { - return new Matcher(this, cs); + public Matcher matcher(CharSequence input) { + return new Matcher(this, input); } - /** + /** - * Split an input string using the pattern as a token separator. + * Splits the given input sequence around occurrences of the {@code Pattern}. + * The function first determines all occurrences of the {@code Pattern} + * inside the input sequence. It then builds an array of the + * "remaining" strings before, in-between, and after these + * occurrences. An additional parameter determines the maximal number of + * entries in the resulting array and the handling of trailing empty + * strings. - * + * - * @param input - * Input sequence to tokenize + * @param inputSeq + * the input sequence. - * @param limit + * @param limit - * If positive, the maximum number of tokens to return. If - * negative, an indefinite number of tokens are returned. If - * zero, an indefinite number of tokens are returned but trailing - * empty tokens are excluded. - * @return A sequence of tokens split out of the input string. + * Determines the maximal number of entries in the resulting + * array. + * + * + * @return the resulting array. - */ + */ - public String[] split(CharSequence input, int limit) { + public String[] split(CharSequence inputSeq, int limit) { ArrayList res = new ArrayList(); - Matcher mat = matcher(input); + Matcher mat = matcher(inputSeq); int index = 0; int curPos = 0; - if (input.length() == 0) { + if (inputSeq.length() == 0) { return new String [] {""}; //$NON-NLS-1$ } else { while (mat.find() && (index + 1 < limit || limit <= 0)) { - res.add(input.subSequence(curPos, mat.start()).toString()); + res.add(inputSeq.subSequence(curPos, mat.start()).toString()); curPos = mat.end(); index++; } - res.add(input.subSequence(curPos, input.length()).toString()); + res.add(inputSeq.subSequence(curPos, inputSeq.length()).toString()); index++; /* @@ -182,95 +236,79 @@ } /** - * @com.intel.drl.spec_ref + * Splits a given input around occurrences of a regular expression. This is + * a convenience method that is equivalent to calling the method + * {@link #split(java.lang.CharSequence, int)} with a limit of 0. + * + * @param input + * the input sequence. + * + * @return the resulting array. */ public String[] split(CharSequence input) { return split(input, 0); } - /** + /** - * Returns the pattern string passed to the compile method + * Returns the regular expression that was compiled into this + * {@code Pattern}. - * + * - * @return A string representation of the pattern + * @return the regular expression. - */ + */ public String pattern() { return lexemes.toString(); } - /** - * Return a textual representation of the pattern. - * - * @return The regular expression string - */ + @Override public String toString() { return this.pattern(); } - /** + /** - * Return the mask of flags used to compile the pattern + * Returns the flags that have been set for this {@code Pattern}. - * + * - * @return A mask of flags used to compile the pattern. + * @return the flags that have been set. A combination of the constants + * defined in this class. + * + * @see #CANON_EQ + * @see #CASE_INSENSITIVE + * @see #COMMENTS + * @see #DOTALL + * @see #LITERAL + * @see #MULTILINE + * @see #UNICODE_CASE + * @see #UNIX_LINES - */ + */ public int flags() { return this.flags; } - /** + /** - * Return a compiled pattern corresponding to the input regular expression - * string. + * Compiles a regular expression, creating a new {@code Pattern} instance in + * the process. Allows to set some flags that modify the behavior of the + * {@code Pattern}. - * + * - * The input flags is a mask of the following flags: - *
- *
UNIX_LINES (0x0001) - *
Enables UNIX lines mode where only \n is recognized as a line - * terminator. The default setting of this flag is off indicating - * that all of the following character sequences are recognized as line - * terminators: \n, \r, \r\n, NEL (\u0085), \u2028 and \u2029. - *
CASE_INSENSITIVE (0x0002) - *
Directs matching to be done in a way that ignores differences in - * case. If input character sequences are encoded in character sets other - * than ASCII, then the UNICODE_CASE must also be set to enable Unicode case - * detection. - *
UNICODE_CASE (0x0040) - *
Enables Unicode case folding if used in conjunction with the - * CASE_INSENSITIVE flag. If CASE_INSENSITIVE - * is not set, then this flag has no effect. - *
COMMENTS (0x0004) - *
Directs the pattern compiler to ignore whitespace and comments in - * the pattern. Whitespace consists of sequences including only these - * characters: SP (\u0020), HT (\t or \u0009), LF (\n or ), VT - * (\u000b), FF (\f or \u000c), and CR (\r or ). A comment is any - * sequence of characters beginning with the "#" (\u0023) character and - * ending in a LF character. - *
MULTILINE (0x0008) - *
Turns on multiple line mode for matching of character sequences. By - * default, this mode is off so that the character "^" (\u005e) matches - * the beginning of the entire input sequence and the character "$" - * (\u0024) matches the end of the input character sequence. In multiple - * line mode, the character "^" matches any character in the input sequence - * which immediately follows a line terminator and the character "$" matches - * any character in the input sequence which immediately precedes a line - * terminator. - *
DOTALL (0x0020) - *
Enables the DOT (".") character in regular expressions to match line - * terminators. By default, line terminators are not matched by DOT. - *
CANON_EQ (0x0080) - *
Enables matching of character sequences which are canonically - * equivalent according to the Unicode standard. Canonical equivalence is - * described here: http://www.unicode.org/reports/tr15/. By default, - * canonical equivalence is not detected while matching. - *
- * - * @param regex - * A regular expression string. + * @param pattern + * the regular expression. * @param flags - * A set of flags to control the compilation of the pattern. - * @return A compiled pattern + * the flags to set. Basically, any combination of the constants + * defined in this class is valid. + * + * @return the new {@code Pattern} instance. + * * @throws PatternSyntaxException - * If the input regular expression does not match the required - * grammar. + * if the regular expression is syntactically incorrect. + * + * @see #CANON_EQ + * @see #CASE_INSENSITIVE + * @see #COMMENTS + * @see #DOTALL + * @see #LITERAL + * @see #MULTILINE + * @see #UNICODE_CASE + * @see #UNIX_LINES */ - public static Pattern compile(String regex, int flags) + public static Pattern compile(String pattern, int flags) throws PatternSyntaxException { if ((flags != 0) && @@ -281,7 +319,7 @@ AbstractSet.counter = 1; - return new Pattern().compileImpl(regex, flags); + return new Pattern().compileImpl(pattern, flags); } /** @@ -294,11 +332,11 @@ * * @return Compiled pattern */ - private Pattern compileImpl(String regex, int flags) + private Pattern compileImpl(String pattern, int flags) throws PatternSyntaxException { - this.lexemes = new Lexer(regex, flags); + this.lexemes = new Lexer(pattern, flags); this.flags = flags; - this.pattern = regex; + this.pattern = pattern; start = processExpression(-1, this.flags, null); if (!lexemes.isEmpty()) { @@ -1276,9 +1314,19 @@ return new UCIRangeSet(charClass); } } - + /** - * @com.intel.drl.spec_ref + * Compiles a regular expression, creating a new Pattern instance in the + * process. This is actually a convenience method that calls {@link + * #compile(String, int)} with a {@code flags} value of zero. + * + * @param pattern + * the regular expression. + * + * @return the new {@code Pattern} instance. + * + * @throws PatternSyntaxException + * if the regular expression is syntactically incorrect. */ public static Pattern compile(String pattern) { return compile(pattern, 0); @@ -1298,14 +1346,39 @@ } } - + /** - * @com.intel.drl.spec_ref + * Tries to match a given regular expression against a given input. This is + * actually nothing but a convenience method that compiles the regular + * expression into a {@code Pattern}, builds a {@link Matcher} for it, and + * then does the match. If the same regular expression is used for multiple + * operations, it is recommended to compile it into a {@code Pattern} + * explicitly and request a reusable {@code Matcher}. + * + * @param regex + * the regular expression. + * @param input + * the input to process. + * + * @return true if and only if the {@code Pattern} matches the input. + * + * @see Pattern#compile(java.lang.String, int) + * @see Matcher#matches() */ public static boolean matches(String regex, CharSequence input) { return Pattern.compile(regex).matcher(input).matches(); } + /** + * Quotes a given string using "\Q" and "\E", so that all other + * meta-characters lose their special meaning. If the string is used for a + * {@code Pattern} afterwards, it can only be matched literally. + * + * @param s + * the string to quote. + * + * @return the quoted string. + */ public static String quote(String s) { StringBuffer sb = new StringBuffer().append("\\Q"); //$NON-NLS-1$ int apos = 0;