Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java (revision 984862) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; /** @@ -27,19 +28,19 @@ public void testGetPrefix(){ JakartaRegexpCapabilities cap = new JakartaRegexpCapabilities(); cap.compile("luc[e]?"); - assertTrue(cap.match("luce")); + assertTrue(cap.match(new BytesRef("luce"))); assertEquals("luc", cap.prefix()); cap.compile("lucene"); - assertTrue(cap.match("lucene")); + assertTrue(cap.match(new BytesRef("lucene"))); assertEquals("lucene", cap.prefix()); } public void testShakyPrefix(){ JakartaRegexpCapabilities cap = new JakartaRegexpCapabilities(); cap.compile("(ab|ac)"); - assertTrue(cap.match("ab")); - assertTrue(cap.match("ac")); + assertTrue(cap.match(new BytesRef("ab"))); + assertTrue(cap.match(new BytesRef("ac"))); // why is it not a??? assertNull(cap.prefix()); } Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (revision 984862) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (working copy) @@ -17,6 +17,9 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.regexp.CharacterIterator; import org.apache.regexp.RE; import org.apache.regexp.REProgram; import java.lang.reflect.Field; @@ -31,7 +34,27 @@ */ public class JakartaRegexpCapabilities implements RegexCapabilities { private RE regexp; + private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); + private final CharacterIterator utf16wrapper = new CharacterIterator() { + public char charAt(int pos) { + return utf16.result[pos]; + } + + public boolean isEnd(int pos) { + return pos >= utf16.length; + } + + public String substring(int beginIndex) { + return substring(beginIndex, utf16.length); + } + + public String substring(int beginIndex, int endIndex) { + return new String(utf16.result, beginIndex, endIndex - beginIndex); + } + + }; + private static Field prefixField; private static Method getPrefixMethod; static { @@ -83,8 +106,9 @@ regexp = new RE(pattern, this.flags); } - public boolean match(String string) { - return regexp.match(string); + public boolean match(BytesRef term) { + UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16); + return regexp.match(utf16wrapper, 0); } public String prefix() { Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (revision 984862) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (working copy) @@ -17,8 +17,12 @@ * limitations under the License. */ +import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; + /** * An implementation tying Java's built-in java.util.regex to RegexQuery. * @@ -28,8 +32,24 @@ */ public class JavaUtilRegexCapabilities implements RegexCapabilities { private Pattern pattern; + private Matcher matcher; private int flags = 0; - + private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); + private final CharSequence utf16wrapper = new CharSequence() { + + public int length() { + return utf16.length; + } + + public char charAt(int index) { + return utf16.result[index]; + } + + public CharSequence subSequence(int start, int end) { + return new String(utf16.result, start, end - start); + } + + }; // Define the optional flags from Pattern that can be used. // Do this here to keep Pattern contained within this class. @@ -68,10 +88,12 @@ public void compile(String pattern) { this.pattern = Pattern.compile(pattern, this.flags); + this.matcher = this.pattern.matcher(utf16wrapper); } - public boolean match(String string) { - return pattern.matcher(string).matches(); + public boolean match(BytesRef term) { + UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16); + return matcher.reset().matches(); } public String prefix() { Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java (revision 984862) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.search.regex; +import org.apache.lucene.util.BytesRef; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -36,7 +38,7 @@ * @param string * @return true if string matches the pattern last passed to {@link #compile}. */ - boolean match(String string); + boolean match(BytesRef term); /** * A wise prefix implementation can reduce the term enumeration (and thus increase performance) Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 984862) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (working copy) @@ -55,8 +55,7 @@ if (term.startsWith(prefixRef)) { // TODO: set BoostAttr based on distance of // searchTerm.text() and term().text() - String text = term.utf8ToString(); - return regexImpl.match(text) ? AcceptStatus.YES : AcceptStatus.NO; + return regexImpl.match(term) ? AcceptStatus.YES : AcceptStatus.NO; } else { return AcceptStatus.NO; }