Index: src/java/org/apache/lucene/analysis/CharArraySet.java
===================================================================
--- src/java/org/apache/lucene/analysis/CharArraySet.java (revision 892588)
+++ src/java/org/apache/lucene/analysis/CharArraySet.java (working copy)
@@ -369,18 +369,31 @@
* @param set
* a set to copy
* @return a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase property will be
+ * is a {@link CharArraySet} the ignoreCase and matchVersion property will be
* preserved.
- * @deprecated use {@link #copy(Version, Set)} instead
+ * @deprecated use {@link #copy(Version, Set)} instead.
*/
- public static CharArraySet copy(Set> set) {
- return copy(Version.LUCENE_30, set);
+ public static CharArraySet copy(final Set> set) {
+ return (set instanceof CharArraySet) ? copy((CharArraySet) set) : copy(Version.LUCENE_30, set);
}
/**
* Returns a copy of the given set as a {@link CharArraySet}. If the given set
* is a {@link CharArraySet} the ignoreCase property will be preserved.
*
+ * @param set
+ * a set to copy
+ * @return a copy of the given set {@link CharArraySet}. The ignoreCase and
+ * matchVersion property will be preserved.
+ */
+ public static CharArraySet copy(final CharArraySet set) {
+ return copy(set.matchVersion, set);
+ }
+
+ /**
+ * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+ * is a {@link CharArraySet} the ignoreCase property will be preserved.
+ *
* @param matchVersion
* compatibility match version see Version
* note above for details.
@@ -388,16 +401,26 @@
* a set to copy
* @return a copy of the given set as a {@link CharArraySet}. If the given set
* is a {@link CharArraySet} the ignoreCase property will be
- * preserved.
+ * preserved. If the matchVersion is the identical, an optimized copy
+ * algorithm will be used.
*/
- public static CharArraySet copy(Version matchVersion, Set> set) {
+ public static CharArraySet copy(final Version matchVersion, final Set> set) {
if (set == null)
throw new NullPointerException("Given set is null");
if(set == EMPTY_SET)
return EMPTY_SET;
- final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
- : false;
- return new CharArraySet(matchVersion, set, ignoreCase);
+ if(set instanceof CharArraySet) {
+ // if set is a char array set use fast path instead of iterating all values
+ // this is even on very small sets ~10 times faster than iterating
+ final CharArraySet source = (CharArraySet) set;
+ if (source.matchVersion != matchVersion) {
+ return new CharArraySet(matchVersion, source, source.ignoreCase);
+ }
+ final char[][] entries = new char[source.entries.length][];
+ System.arraycopy(source.entries, 0, entries, 0, entries.length);
+ return new CharArraySet(matchVersion, entries, source.ignoreCase, source.count);
+ }
+ return new CharArraySet(matchVersion, set, false);
}
Index: src/test/org/apache/lucene/analysis/TestCharArraySet.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestCharArraySet.java (revision 892588)
+++ src/test/org/apache/lucene/analysis/TestCharArraySet.java (working copy)
@@ -17,11 +17,16 @@
* limitations under the License.
*/
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
+
public class TestCharArraySet extends LuceneTestCase {
static final String[] TEST_STOP_WORDS = {
@@ -61,11 +66,15 @@
Integer val = Integer.valueOf(1);
set.add(val);
assertTrue(set.contains(val));
- assertTrue(set.contains(Integer.valueOf(1)));
+ assertTrue(set.contains(new Integer(1))); // another integer
+ assertTrue(set.contains("1"));
+ assertTrue(set.contains(new char[]{'1'}));
// test unmodifiable
set = CharArraySet.unmodifiableSet(set);
assertTrue(set.contains(val));
- assertTrue(set.contains(Integer.valueOf(1)));
+ assertTrue(set.contains(new Integer(1))); // another integer
+ assertTrue(set.contains("1"));
+ assertTrue(set.contains(new char[]{'1'}));
}
public void testClear(){
@@ -165,9 +174,16 @@
public void testUnmodifiableSet(){
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 10,true);
set.addAll(Arrays.asList(TEST_STOP_WORDS));
+ set.add(Integer.valueOf(1));
final int size = set.size();
set = CharArraySet.unmodifiableSet(set);
assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
+ for (String stopword : TEST_STOP_WORDS) {
+ assertTrue(set.contains(stopword));
+ }
+ assertTrue(set.contains(Integer.valueOf(1)));
+ assertTrue(set.contains("1"));
+ assertTrue(set.contains(new char[]{'1'}));
try{
CharArraySet.unmodifiableSet(null);
@@ -301,4 +317,88 @@
.contains(lowerArr[i]));
}
}
+
+ /**
+ * Test the static #copy() function with a CharArraySet as a source
+ */
+ public void testCopyCharArraySet() {
+ CharArraySet setIngoreCase = new CharArraySet(Version.LUCENE_CURRENT, 10, true);
+ CharArraySet setCaseSensitive = new CharArraySet(Version.LUCENE_CURRENT, 10, false);
+
+ List stopwords = Arrays.asList(TEST_STOP_WORDS);
+ List stopwordsUpper = new ArrayList();
+ for (String string : stopwords) {
+ stopwordsUpper.add(string.toUpperCase());
+ }
+ setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
+ setIngoreCase.add(Integer.valueOf(1));
+ setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
+ setCaseSensitive.add(Integer.valueOf(1));
+
+ CharArraySet copy = CharArraySet.copy(Version.LUCENE_CURRENT, setIngoreCase);
+ CharArraySet copyCaseSens = CharArraySet.copy(Version.LUCENE_CURRENT, setCaseSensitive);
+
+ assertEquals(setIngoreCase.size(), copy.size());
+ assertEquals(setCaseSensitive.size(), copy.size());
+
+ assertTrue(copy.containsAll(stopwords));
+ assertTrue(copy.containsAll(stopwordsUpper));
+ assertTrue(copyCaseSens.containsAll(stopwords));
+ for (String string : stopwordsUpper) {
+ assertFalse(copyCaseSens.contains(string));
+ }
+ // test adding terms to the copy
+ List newWords = new ArrayList();
+ for (String string : stopwords) {
+ newWords.add(string+"_1");
+ }
+ copy.addAll(newWords);
+
+ assertTrue(copy.containsAll(stopwords));
+ assertTrue(copy.containsAll(stopwordsUpper));
+ assertTrue(copy.containsAll(newWords));
+ // new added terms are not in the source set
+ for (String string : newWords) {
+ assertFalse(setIngoreCase.contains(string));
+ assertFalse(setCaseSensitive.contains(string));
+
+ }
+ }
+
+ /**
+ * Test the static #copy() function with a JDK {@link Set} as a source
+ */
+ public void testCopyJDKSet() {
+ Set set = new HashSet();
+
+ List stopwords = Arrays.asList(TEST_STOP_WORDS);
+ List stopwordsUpper = new ArrayList();
+ for (String string : stopwords) {
+ stopwordsUpper.add(string.toUpperCase());
+ }
+ set.addAll(Arrays.asList(TEST_STOP_WORDS));
+
+ CharArraySet copy = CharArraySet.copy(Version.LUCENE_CURRENT, set);
+
+ assertEquals(set.size(), copy.size());
+ assertEquals(set.size(), copy.size());
+
+ assertTrue(copy.containsAll(stopwords));
+ for (String string : stopwordsUpper) {
+ assertFalse(copy.contains(string));
+ }
+
+ List newWords = new ArrayList();
+ for (String string : stopwords) {
+ newWords.add(string+"_1");
+ }
+ copy.addAll(newWords);
+
+ assertTrue(copy.containsAll(stopwords));
+ assertTrue(copy.containsAll(newWords));
+ // new added terms are not in the source set
+ for (String string : newWords) {
+ assertFalse(set.contains(string));
+ }
+ }
}