Index: modules/regex/src/main/java/java/util/regex/Pattern.java =================================================================== --- modules/regex/src/main/java/java/util/regex/Pattern.java (revision 394942) +++ modules/regex/src/main/java/java/util/regex/Pattern.java (working copy) @@ -24,11 +24,14 @@ import java.util.ArrayList; + /** - * @com.intel.drl.spec_ref + * Pattern implements a compiler for regular expressions as defined by the J2SE + * specification. The regular expression syntax is largely similar to the syntax + * defined by Perl 5 but has both omissions and extensions. A formal and + * complete definition of the regular expression syntax is not provided by the + * J2SE speTBD (TODO) * - * @author Nikolay A. Kuznetsov - * @version $Revision: 1.36.2.2 $ */ public final class Pattern implements Serializable { @@ -98,16 +101,29 @@ transient AbstractSet start = null; - /** - * @com.intel.drl.spec_ref - */ + /** + * Create a matcher for this pattern and a given input character sequence + * + * @param cs + * The input character sequence + * @return A new matcher + */ public Matcher matcher(CharSequence cs) { return new Matcher(this, cs); } - /** - * @com.intel.drl.spec_ref - */ + /** + * Split an input string using the pattern as a token separator. + * + * @param input + * Input sequence to tokenize + * @param limit + * If positive, the maximum number of tokens to return. If + * negative, an indefinite number of tokens are returned. If + * zero, an indefinite number of tokens are returned but trailing + * empty tokens are excluded. + * @return A sequence of tokens split out of the input string. + */ public String[] split(CharSequence input, int limit) { ArrayList res = new ArrayList(); Matcher mat = matcher(input); @@ -145,30 +161,87 @@ return split(input, 0); } - /** - * @com.intel.drl.spec_ref - */ + /** + * Returns the pattern string passed to the compile method + * + * @return A string representation of the pattern + */ public String pattern() { return lexemes.toString(); } - /** - * @com.intel.drl.spec_ref - */ + /** + * Return a textual representation of the pattern. + * + * @return The regular expression string + */ public String toString() { return this.pattern(); } - /** - * @com.intel.drl.spec_ref - */ + /** + * Return the mask of flags used to compile the pattern + * + * @return A mask of flags used to compile the pattern. + */ public int flags() { return this.flags; } - /** - * @com.intel.drl.spec_ref - */ + /** + * Return a compiled pattern corresponding to the input regular expression + * string. + * + * The input flags is a mask of the following flags: + *
+ *
UNIX_LINES (0x0001) + *
Enables UNIX lines mode where only \n is recognized as a line + * terminator. The default setting of this flag is off indicating + * that all of the following character sequences are recognized as line + * terminators: \n, \r, \r\n, NEL (\u0085), \u2028 and \u2029. + *
CASE_INSENSITIVE (0x0002) + *
Directs matching to be done in a way that ignores differences in + * case. If input character sequences are encoded in character sets other + * than ASCII, then the UNICODE_CASE must also be set to enable Unicode case + * detection. + *
UNICODE_CASE (0x0040) + *
Enables Unicode case folding if used in conjuntion with the + * CASE_INSENSITIVE flag. If CASE_INSENSITIVE + * is not set, then this flag has no effect. + *
COMMENTS (0x0004) + *
Directs the pattern compiler to ignore whitespace and comments in + * the pattern. Whitespace consists of sequences including only these + * characters: SP (\u0020), HT (\t or \u0009), LF (\n or ), VT (\u000b), FF + * (\f or \u000c), and CR (\r or ). A comment is any sequence of characters + * beginning with the "#" (\u0023) character and ending in a LF character. + *
MULTILINE (0x0008) + *
Turns on multiple line mode for matching of character sequences. By + * default, this mode is off so that the character "^" (\u005e) matches the + * beginning of the entire input sequence and the character "$" (\u0024) + * matches the end of the input character sequence. In multiple line mode, + * the character "^" matches any character in the input sequence which + * immediately follows a line terminator and the character "$" matches any + * character in the input sequence which immediately precedes a line + * terminator. + *
DOTALL (0x0020) + *
Enables the DOT (".") character in regular expressions to match line + * terminators. By default, line terminators are not matched by DOT. + *
CANON_EQ (0x0080) + *
Enables matching of character sequences which are cacnonically + * equivalent according to the Unicode standard. Canonical equivalence is + * described here: http://www.unicode.org/reports/tr15/. By default, + * canonical equivalence is not detected while matching. + *
+ * + * @param regex + * A regular expression string. + * @param flags + * A set of flags to control the compilation of the pattern. + * @return A compiled pattern + * @throws PatternSyntaxException + * If the input regular expression does not match the required + * grammar. + */ public static Pattern compile(String regex, int flags) throws PatternSyntaxException { AbstractSet.counter = 1; Index: modules/regex/src/main/java/java/util/regex/Matcher.java =================================================================== --- modules/regex/src/main/java/java/util/regex/Matcher.java (revision 394942) +++ modules/regex/src/main/java/java/util/regex/Matcher.java (working copy) @@ -216,17 +216,26 @@ return this; } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * TODO: appendTail(StringBuffer) javadoc + * + * @param sb + * @return + */ public StringBuffer appendTail(StringBuffer sb) { return sb.append(string.subSequence(appendPos, string.length())); } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * This is very similar to replaceAll except only the first occurrence of a + * sequence matching the pattern is replaced. + * + * @param replacement + * A string to replace occurrences of character sequences + * matching the pattern. + * @return A new string with replacements inserted + */ public String replaceFirst(String replacement) { reset(); if (find()) { @@ -238,10 +247,17 @@ return string.toString(); } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * Replace all occurrences of character sequences which match the pattern + * with the given replacement string. The replacement string may refer to + * capturing groups using the syntax "$". + * + * @param replacement + * A string to replace occurrences of character sequences + * matching the pattern. + * @return A new string with replacements inserted + */ public String replaceAll(String replacement) { StringBuffer sb = new StringBuffer(); reset(); @@ -251,10 +267,12 @@ return appendTail(sb).toString(); } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * Return a reference to the pattern used by this Matcher. + * + * @return A reference to the pattern used by this Matcher. + */ public Pattern pattern() { return pat; } @@ -298,10 +316,15 @@ matchResult.setStartIndex(startIndex); return start.find(startIndex, string, matchResult); } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * The find() method matches the pattern against the character sequence + * beginning at the character after the last match or at the beginning of + * the sequence if called immediately after reset(). The method returns true + * if and only if a match is found. + * + * @return A boolean indicating if the pattern was matched. + */ public boolean find() { int length = string.length(); if (!hasTransparentBounds()) @@ -333,10 +356,15 @@ public int end(int groupIndex) { return matchResult.end(groupIndex); } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * This method is identical in function to the Pattern.matches() method. It + * returns true if and only if the regular expression pattern matches the + * entire input character sequence. + * + * @return A boolean indicating if the pattern matches the entire input + * character sequence. + */ public boolean matches() { return lookingAt(leftBound, Matcher.MODE_MATCH); } @@ -386,10 +414,16 @@ return false; } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * This method attempts to match the pattern against the character sequence + * starting at the beginning. If the pattern matches even a prefix of the + * input character sequence, lookingAt() will return true. Otherwise it will + * return false. + * + * @return A boolean indicating if the pattern matches a prefix of the input + * character sequence. + */ public boolean lookingAt() { return lookingAt(leftBound, Matcher.MODE_FIND); } @@ -407,10 +441,12 @@ public int start() { return start(0); } - - /** - * @com.intel.drl.spec_ref - */ + + /** + * Return the number of capturing groups in the pattern. + * + * @return The number of capturing groups in the pattern. + */ public int groupCount() { return matchResult.groupCount(); }