Index: src/java/org/apache/lucene/analysis/CharArraySet.java =================================================================== --- src/java/org/apache/lucene/analysis/CharArraySet.java (revision 892588) +++ src/java/org/apache/lucene/analysis/CharArraySet.java (working copy) @@ -361,6 +361,25 @@ */ return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count); } + + /** + * Returns a copy of the given {@link CharArraySet}. The ignoreCase + * and matchVersion property will be preserved. + * + * @param set + * a {@link CharArraySet} to copy + * @return a copy of the given set . The ignoreCase and + * matchVersion property will be preserved. + */ + public static CharArraySet copy(final CharArraySet set) { + if(set == EMPTY_SET) + return EMPTY_SET; + // iuse fast path instead of iterating all values + // this is even on very small sets ~10 times faster than iterating + final char[][] entries = new char[set.entries.length][]; + System.arraycopy(set.entries, 0, entries, 0, entries.length); + return new CharArraySet(set.matchVersion, entries, set.ignoreCase, set.count); + } /** * Returns a copy of the given set as a {@link CharArraySet}. If the given set @@ -369,12 +388,14 @@ * @param set * a set to copy * @return a copy of the given set as a {@link CharArraySet}. If the given set - * is a {@link CharArraySet} the ignoreCase property will be + * is a {@link CharArraySet} the ignoreCase and matchVersion property will be * preserved. - * @deprecated use {@link #copy(Version, Set)} instead + * @deprecated use {@link #copy(Version, Set)} instead. */ - public static CharArraySet copy(Set set) { - return copy(Version.LUCENE_30, set); + public static CharArraySet copy(final Set set) { + if(set == EMPTY_SET) + return EMPTY_SET; + return (set instanceof CharArraySet) ? copy((CharArraySet) set) : copy(Version.LUCENE_30, set); } /** @@ -388,16 +409,18 @@ * a set to copy * @return a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property will be - * preserved. + * preserved. If the matchVersion is the identical, an optimized copy + * algorithm will be used. */ - public static CharArraySet copy(Version matchVersion, Set set) { - if (set == null) - throw new NullPointerException("Given set is null"); + public static CharArraySet copy(final Version matchVersion, final Set set) { if(set == EMPTY_SET) return EMPTY_SET; - final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase - : false; - return new CharArraySet(matchVersion, set, ignoreCase); + if(set instanceof CharArraySet) { + final CharArraySet source = (CharArraySet) set; + return (source.matchVersion == matchVersion) ? + copy(source) : new CharArraySet(matchVersion, source, source.ignoreCase); + } + return new CharArraySet(matchVersion, set, false); } Index: src/test/org/apache/lucene/analysis/TestCharArraySet.java =================================================================== --- src/test/org/apache/lucene/analysis/TestCharArraySet.java (revision 892588) +++ src/test/org/apache/lucene/analysis/TestCharArraySet.java (working copy) @@ -17,11 +17,16 @@ * limitations under the License. */ +import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; + public class TestCharArraySet extends LuceneTestCase { static final String[] TEST_STOP_WORDS = { @@ -61,11 +66,15 @@ Integer val = Integer.valueOf(1); set.add(val); assertTrue(set.contains(val)); - assertTrue(set.contains(Integer.valueOf(1))); + assertTrue(set.contains(new Integer(1))); // another integer + assertTrue(set.contains("1")); + assertTrue(set.contains(new char[]{'1'})); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(val)); - assertTrue(set.contains(Integer.valueOf(1))); + assertTrue(set.contains(new Integer(1))); // another integer + assertTrue(set.contains("1")); + assertTrue(set.contains(new char[]{'1'})); } public void testClear(){ @@ -165,9 +174,16 @@ public void testUnmodifiableSet(){ CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 10,true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); + set.add(Integer.valueOf(1)); final int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call" , size, set.size()); + for (String stopword : TEST_STOP_WORDS) { + assertTrue(set.contains(stopword)); + } + assertTrue(set.contains(Integer.valueOf(1))); + assertTrue(set.contains("1")); + assertTrue(set.contains(new char[]{'1'})); try{ CharArraySet.unmodifiableSet(null); @@ -301,4 +317,88 @@ .contains(lowerArr[i])); } } + + /** + * Test the static #copy() function with a CharArraySet as a source + */ + public void testCopyCharArraySet() { + CharArraySet setIngoreCase = new CharArraySet(Version.LUCENE_CURRENT, 10, true); + CharArraySet setCaseSensitive = new CharArraySet(Version.LUCENE_CURRENT, 10, false); + + List stopwords = Arrays.asList(TEST_STOP_WORDS); + List stopwordsUpper = new ArrayList(); + for (String string : stopwords) { + stopwordsUpper.add(string.toUpperCase()); + } + setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); + setIngoreCase.add(Integer.valueOf(1)); + setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); + setCaseSensitive.add(Integer.valueOf(1)); + + CharArraySet copy = CharArraySet.copy(Version.LUCENE_CURRENT, setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(Version.LUCENE_CURRENT, setCaseSensitive); + + assertEquals(setIngoreCase.size(), copy.size()); + assertEquals(setCaseSensitive.size(), copy.size()); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(stopwordsUpper)); + assertTrue(copyCaseSens.containsAll(stopwords)); + for (String string : stopwordsUpper) { + assertFalse(copyCaseSens.contains(string)); + } + // test adding terms to the copy + List newWords = new ArrayList(); + for (String string : stopwords) { + newWords.add(string+"_1"); + } + copy.addAll(newWords); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(stopwordsUpper)); + assertTrue(copy.containsAll(newWords)); + // new added terms are not in the source set + for (String string : newWords) { + assertFalse(setIngoreCase.contains(string)); + assertFalse(setCaseSensitive.contains(string)); + + } + } + + /** + * Test the static #copy() function with a JDK {@link Set} as a source + */ + public void testCopyJDKSet() { + Set set = new HashSet(); + + List stopwords = Arrays.asList(TEST_STOP_WORDS); + List stopwordsUpper = new ArrayList(); + for (String string : stopwords) { + stopwordsUpper.add(string.toUpperCase()); + } + set.addAll(Arrays.asList(TEST_STOP_WORDS)); + + CharArraySet copy = CharArraySet.copy(Version.LUCENE_CURRENT, set); + + assertEquals(set.size(), copy.size()); + assertEquals(set.size(), copy.size()); + + assertTrue(copy.containsAll(stopwords)); + for (String string : stopwordsUpper) { + assertFalse(copy.contains(string)); + } + + List newWords = new ArrayList(); + for (String string : stopwords) { + newWords.add(string+"_1"); + } + copy.addAll(newWords); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(newWords)); + // new added terms are not in the source set + for (String string : newWords) { + assertFalse(set.contains(string)); + } + } }