Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 785621) +++ CHANGES.txt (working copy) @@ -410,7 +410,12 @@ 26. LUCENE-1550: Added new n-gram based String distance measure for spell checking. See the Javadocs for NGramDistance.java for a reference paper on why this is helpful (Tom Morton via Grant Ingersoll) - +27. LUCENE-1745: Added constructors to JakartaRegexpCapabilities and JavaUtilRegexCapabilities + as well as static flags to support configuring a RegexCapabilities implementation + with the implementation-specific modifier flags. Allows for callers to customize the + RegexQuery using the implementation-specific options and fine tune how + regular expressions are compiled and matched. (Marc Zampetti zampettim@aim.com) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing Index: contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java =================================================================== --- contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (revision 785621) +++ contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (working copy) @@ -33,6 +33,7 @@ private IndexSearcher searcher; private final String FN = "field"; + public void setUp() { RAMDirectory directory = new RAMDirectory(); try { @@ -59,8 +60,12 @@ private Term newTerm(String value) { return new Term(FN, value); } - private int regexQueryNrHits(String regex) throws Exception { + private int regexQueryNrHits(String regex, RegexCapabilities capability) throws Exception { RegexQuery query = new RegexQuery( newTerm(regex)); + + if ( capability != null ) + query.setRegexImplementation(capability); + return searcher.search(query).length(); } @@ -68,19 +73,20 @@ SpanRegexQuery srq1 = new SpanRegexQuery( newTerm(regex1)); SpanRegexQuery srq2 = new SpanRegexQuery( newTerm(regex2)); SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered); + return searcher.search(query).length(); } public void testRegex1() throws Exception { - assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$")); + assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$", null)); } public void testRegex2() throws Exception { - assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$")); + assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$", null)); } public void testRegex3() throws Exception { - assertEquals(0, regexQueryNrHits("^q.[aeiou]c$")); + assertEquals(0, regexQueryNrHits("^q.[aeiou]c$", null)); } public void testSpanRegex1() throws Exception { @@ -98,6 +104,22 @@ RegexQuery query2 = new RegexQuery( newTerm("foo.*")); assertFalse(query1.equals(query2)); } + + public void testJakartaCaseSensativeFail() throws Exception { + assertEquals(0, regexQueryNrHits("^.*DOG.*$", null)); + } + public void testJavaUtilCaseSensativeFail() throws Exception { + assertEquals(0, regexQueryNrHits("^.*DOG.*$", null)); + } + + public void testJakartaCaseInsensative() throws Exception { + assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JakartaRegexpCapabilities(JakartaRegexpCapabilities.FLAG_MATCH_CASEINDEPENDENT))); + } + + public void testJavaUtilCaseInsensative() throws Exception { + assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JavaUtilRegexCapabilities(JavaUtilRegexCapabilities.FLAG_CASE_INSENSITIVE))); + } + } Index: contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java =================================================================== --- contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (revision 785621) +++ contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (working copy) @@ -27,9 +27,40 @@ */ public class JakartaRegexpCapabilities implements RegexCapabilities { private RE regexp; + + // Define the flags that are possible. Redefine them here + // to avoid exposign the RE class to the caller. + + private int flags = RE.MATCH_NORMAL; + /** + * Flag to specify normal, case-sensitive matching behaviour. This is the default. + */ + public static final int FLAG_MATCH_NORMAL = RE.MATCH_NORMAL; + + /** + * Flag to specify that matching should be case-independent (folded) + */ + public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT; + + /** + * Contructs a RegexCapabilities with the default MATCH_NORMAL match style. + */ + public JakartaRegexpCapabilities() {} + + /** + * Constructs a RegexCapabilities with the provided match flags. + * Multiple flags should be ORed together. + * + * @param flags The matching style + */ + public JakartaRegexpCapabilities(int flags) + { + this.flags = flags; + } + public void compile(String pattern) { - regexp = new RE(pattern); + regexp = new RE(pattern, this.flags); } public boolean match(String string) { Index: contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java =================================================================== --- contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (revision 785621) +++ contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (working copy) @@ -28,9 +28,46 @@ */ public class JavaUtilRegexCapabilities implements RegexCapabilities { private Pattern pattern; - + private int flags = 0; + + // Define the optional flags from Pattern that can be used. + // Do this here to keep Pattern contained within this class. + + public static final int FLAG_CANON_EQ = Pattern.CANON_EQ; + public static final int FLAG_CASE_INSENSITIVE = Pattern.CASE_INSENSITIVE; + public static final int FLAG_COMMENTS = Pattern.COMMENTS; + public static final int FLAG_DOTALL = Pattern.DOTALL; + public static final int FLAG_LITERAL = Pattern.LITERAL; + public static final int FLAG_MULTILINE = Pattern.MULTILINE; + public static final int FLAG_UNICODE_CASE = Pattern.UNICODE_CASE; + public static final int FLAG_UNIX_LINES = Pattern.UNIX_LINES; + + /** + * Default constructor that uses java.util.regex.Pattern + * with its default flags. + */ + public JavaUtilRegexCapabilities() { + this.flags = 0; + } + + /** + * Constructor that allows for the modification of the flags that + * the java.util.regex.Pattern will use to compile the regular expression. + * This gives the user the ability to fine-tune how the regular expression + * to match the functionlity that they need. + * The {@link java.util.regex.Pattern Pattern} class supports specifying + * these fields via the regular expression text itself, but this gives the caller + * another option to modify the behavior. Useful in cases where the regular expression text + * cannot be modified, or if doing so is undesired. + * + * @flags The flags that are ORed together. + */ + public JavaUtilRegexCapabilities(int flags) { + this.flags = flags; + } + public void compile(String pattern) { - this.pattern = Pattern.compile(pattern); + this.pattern = Pattern.compile(pattern, this.flags); } public boolean match(String string) {