Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java (revision 986633) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; /** @@ -26,21 +27,21 @@ public void testGetPrefix(){ JakartaRegexpCapabilities cap = new JakartaRegexpCapabilities(); - cap.compile("luc[e]?"); - assertTrue(cap.match("luce")); - assertEquals("luc", cap.prefix()); + RegexCapabilities.RegexMatcher matcher = cap.compile("luc[e]?"); + assertTrue(matcher.match(new BytesRef("luce"))); + assertEquals("luc", matcher.prefix()); - cap.compile("lucene"); - assertTrue(cap.match("lucene")); - assertEquals("lucene", cap.prefix()); + matcher = cap.compile("lucene"); + assertTrue(matcher.match(new BytesRef("lucene"))); + assertEquals("lucene", matcher.prefix()); } public void testShakyPrefix(){ JakartaRegexpCapabilities cap = new JakartaRegexpCapabilities(); - cap.compile("(ab|ac)"); - assertTrue(cap.match("ab")); - assertTrue(cap.match("ac")); + RegexCapabilities.RegexMatcher matcher = cap.compile("(ab|ac)"); + assertTrue(matcher.match(new BytesRef("ab"))); + assertTrue(matcher.match(new BytesRef("ac"))); // why is it not a??? - assertNull(cap.prefix()); + assertNull(matcher.prefix()); } } Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (revision 986633) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (working copy) @@ -76,23 +76,27 @@ return buffer.toString(); } - /* generated by IntelliJ IDEA */ @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; - - final RegexQuery that = (RegexQuery) o; - - return regexImpl.equals(that.regexImpl); - } - - /* generated by IntelliJ IDEA */ - @Override public int hashCode() { + final int prime = 31; int result = super.hashCode(); - result = 29 * result + regexImpl.hashCode(); + result = prime * result + ((regexImpl == null) ? 0 : regexImpl.hashCode()); + result = prime * result + ((term == null) ? 0 : term.hashCode()); return result; } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + RegexQuery other = (RegexQuery) obj; + if (regexImpl == null) { + if (other.regexImpl != null) return false; + } else if (!regexImpl.equals(other.regexImpl)) return false; + if (term == null) { + if (other.term != null) return false; + } else if (!term.equals(other.term)) return false; + return true; + } } Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (revision 986633) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (working copy) @@ -17,6 +17,9 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.regexp.CharacterIterator; import org.apache.regexp.RE; import org.apache.regexp.REProgram; import java.lang.reflect.Field; @@ -30,8 +33,6 @@ * it doesn't always provide a prefix even if one would exist. */ public class JakartaRegexpCapabilities implements RegexCapabilities { - private RE regexp; - private static Field prefixField; private static Method getPrefixMethod; static { @@ -79,45 +80,75 @@ this.flags = flags; } - public void compile(String pattern) { - regexp = new RE(pattern, this.flags); + public RegexCapabilities.RegexMatcher compile(String regex) { + return new JakartaRegexMatcher(regex, flags); } - public boolean match(String string) { - return regexp.match(string); + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + flags; + return result; } - public String prefix() { - try { - final char[] prefix; - if (getPrefixMethod != null) { - prefix = (char[]) getPrefixMethod.invoke(regexp.getProgram()); - } else if (prefixField != null) { - prefix = (char[]) prefixField.get(regexp.getProgram()); - } else { - return null; - } - return prefix == null ? null : new String(prefix); - } catch (Exception e) { - // if we cannot get the prefix, return none - return null; - } + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + JakartaRegexpCapabilities other = (JakartaRegexpCapabilities) obj; + if (flags != other.flags) return false; + return true; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher { + private RE regexp; + private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); + private final CharacterIterator utf16wrapper = new CharacterIterator() { - final JakartaRegexpCapabilities that = (JakartaRegexpCapabilities) o; + public char charAt(int pos) { + return utf16.result[pos]; + } - if (regexp != null ? !regexp.equals(that.regexp) : that.regexp != null) return false; + public boolean isEnd(int pos) { + return pos >= utf16.length; + } - return true; - } + public String substring(int beginIndex) { + return substring(beginIndex, utf16.length); + } - @Override - public int hashCode() { - return (regexp != null ? regexp.hashCode() : 0); + public String substring(int beginIndex, int endIndex) { + return new String(utf16.result, beginIndex, endIndex - beginIndex); + } + + }; + + public JakartaRegexMatcher(String regex, int flags) { + regexp = new RE(regex, flags); + } + + public boolean match(BytesRef term) { + UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16); + return regexp.match(utf16wrapper, 0); + } + + public String prefix() { + try { + final char[] prefix; + if (getPrefixMethod != null) { + prefix = (char[]) getPrefixMethod.invoke(regexp.getProgram()); + } else if (prefixField != null) { + prefix = (char[]) prefixField.get(regexp.getProgram()); + } else { + return null; + } + return prefix == null ? null : new String(prefix); + } catch (Exception e) { + // if we cannot get the prefix, return none + return null; + } + } } } Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (revision 986633) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (working copy) @@ -17,8 +17,12 @@ * limitations under the License. */ +import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; + /** * An implementation tying Java's built-in java.util.regex to RegexQuery. * @@ -27,9 +31,8 @@ * attempt to {@link #match} each term for the specified field in the index. */ public class JavaUtilRegexCapabilities implements RegexCapabilities { - private Pattern pattern; private int flags = 0; - + // Define the optional flags from Pattern that can be used. // Do this here to keep Pattern contained within this class. @@ -66,32 +69,59 @@ this.flags = flags; } - public void compile(String pattern) { - this.pattern = Pattern.compile(pattern, this.flags); + public RegexCapabilities.RegexMatcher compile(String regex) { + return new JavaUtilRegexMatcher(regex, flags); } - - public boolean match(String string) { - return pattern.matcher(string).matches(); + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + flags; + return result; } - public String prefix() { - return null; + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + JavaUtilRegexCapabilities other = (JavaUtilRegexCapabilities) obj; + if (flags != other.flags) return false; + return true; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher { + private final Pattern pattern; + private final Matcher matcher; + private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); + private final CharSequence utf16wrapper = new CharSequence() { - final JavaUtilRegexCapabilities that = (JavaUtilRegexCapabilities) o; + public int length() { + return utf16.length; + } - if (pattern != null ? !pattern.equals(that.pattern) : that.pattern != null) return false; + public char charAt(int index) { + return utf16.result[index]; + } - return true; - } + public CharSequence subSequence(int start, int end) { + return new String(utf16.result, start, end - start); + } + }; + + public JavaUtilRegexMatcher(String regex, int flags) { + this.pattern = Pattern.compile(regex, flags); + this.matcher = this.pattern.matcher(utf16wrapper); + } + + public boolean match(BytesRef term) { + UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16); + return matcher.reset().matches(); + } - @Override - public int hashCode() { - return (pattern != null ? pattern.hashCode() : 0); + public String prefix() { + return null; + } } } Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java (revision 986633) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java (working copy) @@ -1,5 +1,9 @@ package org.apache.lucene.search.regex; +import java.io.Serializable; + +import org.apache.lucene.util.BytesRef; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -21,7 +25,7 @@ * Defines basic operations needed by {@link RegexQuery} for a regular * expression implementation. */ -public interface RegexCapabilities { +public interface RegexCapabilities extends Serializable { /** * Called by the constructor of {@link RegexTermEnum} allowing * implementations to cache a compiled version of the regular @@ -29,20 +33,22 @@ * * @param pattern regular expression pattern */ - void compile(String pattern); + public RegexMatcher compile(String pattern); - /** - * - * @param string - * @return true if string matches the pattern last passed to {@link #compile}. - */ - boolean match(String string); + public interface RegexMatcher { + /** + * + * @param string + * @return true if string matches the pattern last passed to {@link #compile}. + */ + public boolean match(BytesRef term); - /** - * A wise prefix implementation can reduce the term enumeration (and thus increase performance) - * of RegexQuery dramatically! - * - * @return static non-regex prefix of the pattern last passed to {@link #compile}. May return null. - */ - String prefix(); + /** + * A wise prefix implementation can reduce the term enumeration (and thus increase performance) + * of RegexQuery dramatically! + * + * @return static non-regex prefix of the pattern last passed to {@link #compile}. May return null. + */ + public String prefix(); + } } Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 986633) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (working copy) @@ -34,16 +34,14 @@ */ public class RegexTermsEnum extends FilteredTermsEnum { - private RegexCapabilities regexImpl; + private RegexCapabilities.RegexMatcher regexImpl; private final BytesRef prefixRef; - public RegexTermsEnum(IndexReader reader, Term term, RegexCapabilities regexImpl) throws IOException { + public RegexTermsEnum(IndexReader reader, Term term, RegexCapabilities regexCap) throws IOException { super(reader, term.field()); String text = term.text(); - this.regexImpl = regexImpl; + this.regexImpl = regexCap.compile(text); - regexImpl.compile(text); - String pre = regexImpl.prefix(); if (pre == null) pre = ""; @@ -55,8 +53,7 @@ if (term.startsWith(prefixRef)) { // TODO: set BoostAttr based on distance of // searchTerm.text() and term().text() - String text = term.utf8ToString(); - return regexImpl.match(text) ? AcceptStatus.YES : AcceptStatus.NO; + return regexImpl.match(term) ? AcceptStatus.YES : AcceptStatus.NO; } else { return AcceptStatus.NO; }