Index: src/java/org/apache/lucene/analysis/CharArrayMap.java
===================================================================
--- src/java/org/apache/lucene/analysis/CharArrayMap.java (revision 905065)
+++ src/java/org/apache/lucene/analysis/CharArrayMap.java (working copy)
@@ -1,15 +1,5 @@
package org.apache.lucene.analysis;
-import java.util.Arrays;
-import java.util.AbstractSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Set;
-
-import org.apache.lucene.util.CharacterUtils;
-import org.apache.lucene.util.Version;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -27,14 +17,22 @@
* limitations under the License.
*/
+import java.util.Arrays;
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.Iterator;
+import java.util.Map;
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;
+
/**
- * A simple class that stores Strings as char[]'s in a
- * hash table. Note that this is not a general purpose
+ * A simple class that stores key Strings as char[]'s in a
+ * hash table. Note that this is not a general purpose
* class. For example, it cannot remove items from the
- * set, nor does it resize its hash table to be smaller,
- * etc. It is designed to be quick to test if a char[]
- * is in the set without the necessity of converting it
+ * map, nor does it resize its hash table to be smaller,
+ * etc. It is designed to be quick to retrieve items
+ * by char[] keys without the necessity of converting
* to a String first.
*
You must specify the required {@link Version}
* compatibility when creating {@link CharArraySet}:
@@ -46,29 +44,22 @@
* lowercased correctly due to the lack of Unicode 4
* support in JDK 1.4. To use instances of
* {@link CharArraySet} with the behavior before Lucene
- * 3.1 pass a {@link Version} < 3.1 to the constructors.
- *
- * Please note: This class implements {@link java.util.Set Set} but
- * does not behave like it should in all cases. The generic type is
- * {@code Set}, because you can add any object to it,
- * that has a string representation. The add methods will use
- * {@link Object#toString} and store the result using a {@code char[]}
- * buffer. The same behavior have the {@code contains()} methods.
- * The {@link #iterator()} returns an {@code Iterator}.
- * For type safety also {@link #stringIterator()} is provided.
+ * 3.1 pass a {@link Version} < 3.1 to the constructors.
*/
-public class CharArraySet extends AbstractSet {
+public class CharArrayMap extends AbstractMap {
+ // private only because missing generics
+ private static final CharArrayMap> EMPTY_MAP = new EmptyCharArrayMap();
+
private final static int INIT_SIZE = 8;
- private char[][] entries;
+ private final CharacterUtils charUtils;
+ private boolean ignoreCase;
private int count;
- private final boolean ignoreCase;
- public static final CharArraySet EMPTY_SET = new EmptyCharArraySet();
-
- private final CharacterUtils charUtils;
- private final Version matchVersion;
+ final Version matchVersion; // package private because used in CharArraySet
+ char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+ V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
/**
- * Create set with enough capacity to hold startSize terms
+ * Create map with enough capacity to hold startSize terms
*
* @param matchVersion
* compatibility match version see Version
@@ -79,101 +70,104 @@
* false if and only if the set should be case sensitive
* otherwise true.
*/
- public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
+ @SuppressWarnings("unchecked")
+ public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
this.ignoreCase = ignoreCase;
int size = INIT_SIZE;
while(startSize + (startSize>>2) > size)
size <<= 1;
- entries = new char[size][];
+ keys = new char[size][];
+ values = (V[]) new Object[size];
this.charUtils = CharacterUtils.getInstance(matchVersion);
this.matchVersion = matchVersion;
}
/**
- * Creates a set from a Collection of objects.
+ * Creates a map from the mappings in another map.
*
* @param matchVersion
* compatibility match version see Version
* note above for details.
* @param c
- * a collection whose elements to be placed into the set
+ * a map whose mappings to be copied
* @param ignoreCase
* false if and only if the set should be case sensitive
* otherwise true.
*/
- public CharArraySet(Version matchVersion, Collection extends Object> c, boolean ignoreCase) {
+ public CharArrayMap(Version matchVersion, Map,? extends V> c, boolean ignoreCase) {
this(matchVersion, c.size(), ignoreCase);
- addAll(c);
+ putAll(c);
}
-
- /**
- * Creates a set with enough capacity to hold startSize terms
- *
- * @param startSize
- * the initial capacity
- * @param ignoreCase
- * false if and only if the set should be case sensitive
- * otherwise true.
- * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
- */
- @Deprecated
- public CharArraySet(int startSize, boolean ignoreCase) {
- this(Version.LUCENE_30, startSize, ignoreCase);
- }
- /**
- * Creates a set from a Collection of objects.
- *
- * @param c
- * a collection whose elements to be placed into the set
- * @param ignoreCase
- * false if and only if the set should be case sensitive
- * otherwise true.
- * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead
- */
- @Deprecated
- public CharArraySet(Collection extends Object> c, boolean ignoreCase) {
- this(Version.LUCENE_30, c.size(), ignoreCase);
- addAll(c);
+ /** Create set from keys */
+ private CharArrayMap(CharArrayMap toCopy){
+ this.keys = toCopy.keys;
+ this.values = toCopy.values;
+ this.ignoreCase = toCopy.ignoreCase;
+ this.count = toCopy.count;
+ this.charUtils = toCopy.charUtils;
+ this.matchVersion = toCopy.matchVersion;
}
- /** Create set from entries */
- private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
- this.entries = entries;
- this.ignoreCase = ignoreCase;
- this.count = count;
- this.charUtils = CharacterUtils.getInstance(matchVersion);
- this.matchVersion = matchVersion;
- }
-
- /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
+ /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
@Override
public void clear() {
count = 0;
- Arrays.fill(entries, null);
+ Arrays.fill(keys, null);
+ Arrays.fill(values, null);
}
/** true if the len chars of text starting at off
- * are in the set */
- public boolean contains(char[] text, int off, int len) {
- return entries[getSlot(text, off, len)] != null;
+ * are in the {@link #keySet} */
+ public boolean containsKey(char[] text, int off, int len) {
+ return keys[getSlot(text, off, len)] != null;
}
- /** true if the CharSequence is in the set */
- public boolean contains(CharSequence cs) {
- return entries[getSlot(cs)] != null;
+ /** true if the CharSequence is in the {@link #keySet} */
+ public boolean containsKey(CharSequence cs) {
+ return keys[getSlot(cs)] != null;
}
+ @Override
+ public boolean containsKey(Object o) {
+ if (o instanceof char[]) {
+ final char[] text = (char[])o;
+ return containsKey(text, 0, text.length);
+ }
+ return containsKey(o.toString());
+ }
+
+
+ /** returns the value of the mapping of len chars of text
+ * starting at off */
+ public V get(char[] text, int off, int len) {
+ return values[getSlot(text, off, len)];
+ }
+
+ /** returns the value of the mapping of the chars inside this {@code CharSequence} */
+ public V get(CharSequence cs) {
+ return values[getSlot(cs)];
+ }
+
+ @Override
+ public V get(Object o) {
+ if (o instanceof char[]) {
+ final char[] text = (char[])o;
+ return get(text, 0, text.length);
+ }
+ return get(o.toString());
+ }
+
private int getSlot(char[] text, int off, int len) {
int code = getHashCode(text, off, len);
- int pos = code & (entries.length-1);
- char[] text2 = entries[pos];
+ int pos = code & (keys.length-1);
+ char[] text2 = keys[pos];
if (text2 != null && !equals(text, off, len, text2)) {
final int inc = ((code>>8)+code)|1;
do {
code += inc;
- pos = code & (entries.length-1);
- text2 = entries[pos];
+ pos = code & (keys.length-1);
+ text2 = keys[pos];
} while (text2 != null && !equals(text, off, len, text2));
}
return pos;
@@ -182,34 +176,42 @@
/** Returns true if the String is in the set */
private int getSlot(CharSequence text) {
int code = getHashCode(text);
- int pos = code & (entries.length-1);
- char[] text2 = entries[pos];
+ int pos = code & (keys.length-1);
+ char[] text2 = keys[pos];
if (text2 != null && !equals(text, text2)) {
final int inc = ((code>>8)+code)|1;
do {
code += inc;
- pos = code & (entries.length-1);
- text2 = entries[pos];
+ pos = code & (keys.length-1);
+ text2 = keys[pos];
} while (text2 != null && !equals(text, text2));
}
return pos;
}
- /** Add this CharSequence into the set */
- public boolean add(CharSequence text) {
- return add(text.toString()); // could be more efficient
+ /** Add the given mapping. */
+ public V put(CharSequence text, V value) {
+ return put(text.toString(), value); // could be more efficient
}
+
+ @Override
+ public V put(Object o, V value) {
+ if (o instanceof char[]) {
+ return put((char[])o, value);
+ }
+ return put(o.toString(), value);
+ }
- /** Add this String into the set */
- public boolean add(String text) {
- return add(text.toCharArray());
+ /** Add the given mapping. */
+ public V put(String text, V value) {
+ return put(text.toCharArray(), value);
}
- /** Add this char[] directly to the set.
+ /** Add the given mapping.
* If ignoreCase is true for this Set, the text array will be directly modified.
* The user should never modify this text array after calling this method.
*/
- public boolean add(char[] text) {
+ public V put(char[] text, V value) {
if (ignoreCase)
for(int i=0;i>2) > entries.length) {
+ if (count + (count>>2) > keys.length) {
rehash();
}
- return true;
+ return null;
}
+ @SuppressWarnings("unchecked")
+ private void rehash() {
+ assert keys.length == values.length;
+ final int newSize = 2*keys.length;
+ final char[][] oldkeys = keys;
+ final V[] oldvalues = values;
+ keys = new char[newSize][];
+ values = (V[]) new Object[newSize];
+
+ for(int i=0; i> {
+ private int pos=-1;
+ private int lastPos;
+ private final boolean allowModify;
+
+ private EntryIterator(boolean allowModify) {
+ this.allowModify = allowModify;
+ goNext();
}
- return add(o.toString());
- }
-
- /**
- * Returns an unmodifiable {@link CharArraySet}. This allows to provide
- * unmodifiable views of internal sets for "read-only" use.
- *
- * @param set
- * a set for which the unmodifiable set is returned.
- * @return an new unmodifiable {@link CharArraySet}.
- * @throws NullPointerException
- * if the given set is null.
- */
- public static CharArraySet unmodifiableSet(CharArraySet set) {
- if (set == null)
- throw new NullPointerException("Given set is null");
- if (set == EMPTY_SET)
- return EMPTY_SET;
- if (set instanceof UnmodifiableCharArraySet)
- return set;
- /*
- * Instead of delegating calls to the given set copy the low-level values to
- * the unmodifiable Subclass
- */
- return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count);
- }
+ private void goNext() {
+ lastPos = pos;
+ pos++;
+ while (pos < keys.length && keys[pos] == null) pos++;
+ }
- /**
- * Returns a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase property will be preserved.
- *
- * @param set
- * a set to copy
- * @return a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase and matchVersion property will be
- * preserved.
- * @deprecated use {@link #copy(Version, Set)} instead.
- */
- @Deprecated
- public static CharArraySet copy(final Set> set) {
- if(set == EMPTY_SET)
- return EMPTY_SET;
- return copy(Version.LUCENE_30, set);
- }
-
- /**
- * Returns a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase property will be preserved.
- *
- * Note: If you intend to create a copy of another {@link CharArraySet} where
- * the {@link Version} of the source set differs from its copy
- * {@link #CharArraySet(Version, Collection, boolean)} should be used instead.
- * The {@link #copy(Version, Set)} will preserve the {@link Version} of the
- * source set it is an instance of {@link CharArraySet}.
- *
- *
- * @param matchVersion
- * compatibility match version see Version
- * note above for details. This argument will be ignored if the
- * given set is a {@link CharArraySet}.
- * @param set
- * a set to copy
- * @return a copy of the given set as a {@link CharArraySet}. If the given set
- * is a {@link CharArraySet} the ignoreCase property as well as the
- * matchVersion will be of the given set will be preserved.
- */
- public static CharArraySet copy(final Version matchVersion, final Set> set) {
- if(set == EMPTY_SET)
- return EMPTY_SET;
- if(set instanceof CharArraySet) {
- final CharArraySet source = (CharArraySet) set;
- // use fast path instead of iterating all values
- // this is even on very small sets ~10 times faster than iterating
- final char[][] entries = new char[source.entries.length][];
- System.arraycopy(source.entries, 0, entries, 0, entries.length);
- return new CharArraySet(source.matchVersion, entries, source.ignoreCase, source.count);
+ public boolean hasNext() {
+ return pos < keys.length;
}
- return new CharArraySet(matchVersion, set, false);
- }
-
- /** The Iterator for this set. Strings are constructed on the fly, so
- * use nextCharArray for more efficient access. */
- public class CharArraySetIterator implements Iterator {
- int pos=-1;
- char[] next;
- CharArraySetIterator() {
+ /** gets the next key... do not modify the returned char[] */
+ public char[] nextKey() {
goNext();
+ return keys[lastPos];
}
- private void goNext() {
- next = null;
- pos++;
- while (pos < entries.length && (next=entries[pos]) == null) pos++;
+ /** gets the next key as a newly created String object */
+ public String nextKeyString() {
+ return new String(nextKey());
}
- public boolean hasNext() {
- return next != null;
+ /** returns the value associated with the last key returned */
+ public V currentValue() {
+ return values[lastPos];
}
- /** do not modify the returned char[] */
- public char[] nextCharArray() {
- char[] ret = next;
- goNext();
- return ret;
+ /** sets the value associated with the last key returned */
+ public V setValue(V value) {
+ if (!allowModify)
+ throw new UnsupportedOperationException();
+ V old = values[lastPos];
+ values[lastPos] = value;
+ return old;
}
- /** Returns the next String, as a Set would...
- * use nextCharArray() for better efficiency. */
- public String next() {
- return new String(nextCharArray());
+ /** use nextCharArray() + currentValue() for better efficiency. */
+ public Map.Entry next() {
+ goNext();
+ return new MapEntry(lastPos, allowModify);
}
public void remove() {
@@ -462,93 +410,241 @@
}
}
- /** returns an iterator of new allocated Strings */
- public Iterator stringIterator() {
- return new CharArraySetIterator();
+ private final class MapEntry implements Map.Entry {
+ private final int pos;
+ private final boolean allowModify;
+
+ private MapEntry(int pos, boolean allowModify) {
+ this.pos = pos;
+ this.allowModify = allowModify;
+ }
+
+ public Object getKey() {
+ // we must clone here, as putAll to another CharArrayMap
+ // with other case sensitivity flag would corrupt the keys
+ return keys[pos].clone();
+ }
+
+ public V getValue() {
+ return values[pos];
+ }
+
+ public V setValue(V value) {
+ if (!allowModify)
+ throw new UnsupportedOperationException();
+ final V old = values[pos];
+ values[pos] = value;
+ return old;
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder().append(keys[pos]).append('=').append(values[pos]).toString();
+ }
}
- /** returns an iterator of new allocated Strings, this method violates the Set interface */
- @Override
- @SuppressWarnings("unchecked")
- public Iterator iterator() {
- return (Iterator) stringIterator();
+ /** public EntrySet class so efficient methods are exposed to users */
+ public final class EntrySet extends AbstractSet> {
+ private final boolean allowModify;
+
+ private EntrySet(boolean allowModify) {
+ this.allowModify = allowModify;
+ }
+
+ @Override
+ public EntryIterator iterator() {
+ return new EntryIterator(allowModify);
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ if (!(o instanceof Map.Entry))
+ return false;
+ final Map.Entry e = (Map.Entry)o;
+ final Object key = e.getKey();
+ final Object val = e.getValue();
+ final Object v = get(key);
+ return v == null ? val == null : v.equals(val);
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int size() {
+ return count;
+ }
+
+ @Override
+ public void clear() {
+ if (!allowModify)
+ throw new UnsupportedOperationException();
+ CharArrayMap.this.clear();
+ }
}
/**
- * Efficient unmodifiable {@link CharArraySet}. This implementation does not
- * delegate calls to a give {@link CharArraySet} like
- * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
- * the internal representation of a {@link CharArraySet} to a super
- * constructor and overrides all mutators.
+ * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
+ * unmodifiable views of internal map for "read-only" use.
+ *
+ * @param map
+ * a map for which the unmodifiable map is returned.
+ * @return an new unmodifiable {@link CharArrayMap}.
+ * @throws NullPointerException
+ * if the given map is null.
*/
- private static class UnmodifiableCharArraySet extends CharArraySet {
+ public static CharArrayMap unmodifiableMap(CharArrayMap map) {
+ if (map == null)
+ throw new NullPointerException("Given map is null");
+ if (map == emptyMap() || map.isEmpty())
+ return emptyMap();
+ if (map instanceof UnmodifiableCharArrayMap)
+ return map;
+ return new UnmodifiableCharArrayMap(map);
+ }
- private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
- int count) {
- super(matchVersion, entries, ignoreCase, count);
+ /**
+ * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
+ * is a {@link CharArrayMap} the ignoreCase property will be preserved.
+ *
+ * Note: If you intend to create a copy of another {@link CharArrayMap} where
+ * the {@link Version} of the source map differs from its copy
+ * {@link #CharArrayMap(Version, Map, boolean)} should be used instead.
+ * The {@link #copy(Version, Map)} will preserve the {@link Version} of the
+ * source map it is an instance of {@link CharArrayMap}.
+ *
+ *
+ * @param matchVersion
+ * compatibility match version see Version
+ * note above for details. This argument will be ignored if the
+ * given map is a {@link CharArrayMap}.
+ * @param map
+ * a map to copy
+ * @return a copy of the given map as a {@link CharArrayMap}. If the given map
+ * is a {@link CharArrayMap} the ignoreCase property as well as the
+ * matchVersion will be of the given map will be preserved.
+ */
+ @SuppressWarnings("unchecked")
+ public static CharArrayMap copy(final Version matchVersion, final Map,? extends V> map) {
+ if(map == EMPTY_MAP)
+ return emptyMap();
+ if(map instanceof CharArrayMap) {
+ CharArrayMap m = (CharArrayMap) map;
+ // use fast path instead of iterating all values
+ // this is even on very small sets ~10 times faster than iterating
+ final char[][] keys = new char[m.keys.length][];
+ System.arraycopy(m.keys, 0, keys, 0, keys.length);
+ final V[] values = (V[]) new Object[m.values.length];
+ System.arraycopy(m.values, 0, values, 0, values.length);
+ m = new CharArrayMap(m);
+ m.keys = keys;
+ m.values = values;
+ return m;
}
+ return new CharArrayMap(matchVersion, map, false);
+ }
+
+ /** Returns an empty, unmodifiable map. */
+ @SuppressWarnings("unchecked")
+ public static CharArrayMap emptyMap() {
+ return (CharArrayMap) EMPTY_MAP;
+ }
+
+ // package private CharArraySet instanceof check in CharArraySet
+ static class UnmodifiableCharArrayMap extends CharArrayMap {
+ UnmodifiableCharArrayMap(CharArrayMap map) {
+ super(map);
+ }
+
@Override
public void clear() {
throw new UnsupportedOperationException();
}
@Override
- public boolean add(Object o){
+ public V put(Object o, V val){
throw new UnsupportedOperationException();
}
@Override
- public boolean addAll(Collection extends Object> coll) {
+ public V put(char[] text, V val) {
throw new UnsupportedOperationException();
}
-
+
@Override
- public boolean add(char[] text) {
+ public V put(CharSequence text, V val) {
throw new UnsupportedOperationException();
}
@Override
- public boolean add(CharSequence text) {
+ public V put(String text, V val) {
throw new UnsupportedOperationException();
}
-
+
@Override
- public boolean add(String text) {
+ public V remove(Object key) {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public EntrySet entrySet() {
+ return new EntrySet(false);
+ }
}
/**
- * Empty {@link UnmodifiableCharArraySet} optimized for speed.
+ * Empty {@link UnmodifiableCharArrayMap} optimized for speed.
* Contains checks will always return false or throw
* NPE if necessary.
*/
- private static final class EmptyCharArraySet extends UnmodifiableCharArraySet {
-
- private EmptyCharArraySet() {
- super(Version.LUCENE_CURRENT, new char[0][], false, 0);
+ private static final class EmptyCharArrayMap extends UnmodifiableCharArrayMap {
+ EmptyCharArrayMap() {
+ super(new CharArrayMap(Version.LUCENE_CURRENT, 0, false));
}
@Override
- public boolean contains(char[] text, int off, int len) {
+ public boolean containsKey(char[] text, int off, int len) {
if(text == null)
throw new NullPointerException();
return false;
}
@Override
- public boolean contains(CharSequence cs) {
+ public boolean containsKey(CharSequence cs) {
if(cs == null)
throw new NullPointerException();
return false;
}
@Override
- public boolean contains(Object o) {
+ public boolean containsKey(Object o) {
if(o == null)
throw new NullPointerException();
return false;
}
+
+ @Override
+ public V get(char[] text, int off, int len) {
+ if(text == null)
+ throw new NullPointerException();
+ return null;
+ }
+
+ @Override
+ public V get(CharSequence cs) {
+ if(cs == null)
+ throw new NullPointerException();
+ return null;
+ }
+
+ @Override
+ public V get(Object o) {
+ if(o == null)
+ throw new NullPointerException();
+ return null;
+ }
}
}
Index: src/java/org/apache/lucene/analysis/CharArraySet.java
===================================================================
--- src/java/org/apache/lucene/analysis/CharArraySet.java (revision 905065)
+++ src/java/org/apache/lucene/analysis/CharArraySet.java (working copy)
@@ -1,15 +1,5 @@
package org.apache.lucene.analysis;
-import java.util.Arrays;
-import java.util.AbstractSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Set;
-
-import org.apache.lucene.util.CharacterUtils;
-import org.apache.lucene.util.Version;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -27,7 +17,14 @@
* limitations under the License.
*/
+import java.util.Arrays;
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Set;
+import org.apache.lucene.util.Version;
+
/**
* A simple class that stores Strings as char[]'s in a
* hash table. Note that this is not a general purpose
@@ -58,15 +55,11 @@
* For type safety also {@link #stringIterator()} is provided.
*/
public class CharArraySet extends AbstractSet {
- private final static int INIT_SIZE = 8;
- private char[][] entries;
- private int count;
- private final boolean ignoreCase;
- public static final CharArraySet EMPTY_SET = new EmptyCharArraySet();
+ public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap());
+ private static final Object PLACEHOLDER = new Object();
- private final CharacterUtils charUtils;
- private final Version matchVersion;
-
+ private final CharArrayMap map;
+
/**
* Create set with enough capacity to hold startSize terms
*
@@ -80,13 +73,7 @@
* otherwise true.
*/
public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
- this.ignoreCase = ignoreCase;
- int size = INIT_SIZE;
- while(startSize + (startSize>>2) > size)
- size <<= 1;
- entries = new char[size][];
- this.charUtils = CharacterUtils.getInstance(matchVersion);
- this.matchVersion = matchVersion;
+ map = new CharArrayMap(matchVersion, startSize, ignoreCase);
}
/**
@@ -101,7 +88,7 @@
* false if and only if the set should be case sensitive
* otherwise true.
*/
- public CharArraySet(Version matchVersion, Collection extends Object> c, boolean ignoreCase) {
+ public CharArraySet(Version matchVersion, Collection> c, boolean ignoreCase) {
this(matchVersion, c.size(), ignoreCase);
addAll(c);
}
@@ -132,77 +119,51 @@
* @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead
*/
@Deprecated
- public CharArraySet(Collection extends Object> c, boolean ignoreCase) {
+ public CharArraySet(Collection> c, boolean ignoreCase) {
this(Version.LUCENE_30, c.size(), ignoreCase);
addAll(c);
}
/** Create set from entries */
- private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){
- this.entries = entries;
- this.ignoreCase = ignoreCase;
- this.count = count;
- this.charUtils = CharacterUtils.getInstance(matchVersion);
- this.matchVersion = matchVersion;
+ private CharArraySet(final CharArrayMap map){
+ this.map = map;
}
/** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
@Override
public void clear() {
- count = 0;
- Arrays.fill(entries, null);
+ map.clear();
}
/** true if the len chars of text starting at off
* are in the set */
public boolean contains(char[] text, int off, int len) {
- return entries[getSlot(text, off, len)] != null;
+ return map.containsKey(text, off, len);
}
/** true if the CharSequence is in the set */
public boolean contains(CharSequence cs) {
- return entries[getSlot(cs)] != null;
+ return map.containsKey(cs);
}
- private int getSlot(char[] text, int off, int len) {
- int code = getHashCode(text, off, len);
- int pos = code & (entries.length-1);
- char[] text2 = entries[pos];
- if (text2 != null && !equals(text, off, len, text2)) {
- final int inc = ((code>>8)+code)|1;
- do {
- code += inc;
- pos = code & (entries.length-1);
- text2 = entries[pos];
- } while (text2 != null && !equals(text, off, len, text2));
- }
- return pos;
+ @Override
+ public boolean contains(Object o) {
+ return map.containsKey(o);
}
- /** Returns true if the String is in the set */
- private int getSlot(CharSequence text) {
- int code = getHashCode(text);
- int pos = code & (entries.length-1);
- char[] text2 = entries[pos];
- if (text2 != null && !equals(text, text2)) {
- final int inc = ((code>>8)+code)|1;
- do {
- code += inc;
- pos = code & (entries.length-1);
- text2 = entries[pos];
- } while (text2 != null && !equals(text, text2));
- }
- return pos;
+ @Override
+ public boolean add(Object o) {
+ return map.put(o, PLACEHOLDER) == null;
}
/** Add this CharSequence into the set */
public boolean add(CharSequence text) {
- return add(text.toString()); // could be more efficient
+ return map.put(text, PLACEHOLDER) == null;
}
/** Add this String into the set */
public boolean add(String text) {
- return add(text.toCharArray());
+ return map.put(text, PLACEHOLDER) == null;
}
/** Add this char[] directly to the set.
@@ -210,141 +171,18 @@
* The user should never modify this text array after calling this method.
*/
public boolean add(char[] text) {
- if (ignoreCase)
- for(int i=0;i>2) > entries.length) {
- rehash();
- }
-
- return true;
+ return map.put(text, PLACEHOLDER) == null;
}
- private boolean equals(char[] text1, int off, int len, char[] text2) {
- if (len != text2.length)
- return false;
- final int limit = off+len;
- if (ignoreCase) {
- for(int i=0;i for this set. Strings are constructed on the fly, so
- * use nextCharArray for more efficient access. */
+ * use nextCharArray for more efficient access.
+ * @deprecated Use the standard iterator, which returns {@code char[]} instances.
+ */
+ @Deprecated
public class CharArraySetIterator implements Iterator {
int pos=-1;
char[] next;
- CharArraySetIterator() {
+ private CharArraySetIterator() {
goNext();
}
private void goNext() {
next = null;
pos++;
- while (pos < entries.length && (next=entries[pos]) == null) pos++;
+ while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++;
}
public boolean hasNext() {
@@ -462,93 +293,26 @@
}
}
- /** returns an iterator of new allocated Strings */
+ /** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}).
+ * @deprecated Use {@link #iterator}, which returns {@code char[]} instances.
+ */
+ @Deprecated
public Iterator stringIterator() {
return new CharArraySetIterator();
}
- /** returns an iterator of new allocated Strings, this method violates the Set interface */
- @Override
- @SuppressWarnings("unchecked")
+ /** Returns an {@link Iterator} depending on the version used:
+ *
+ * if {@code matchVersion} ≥ 3.1, it returns {@code char[]} instances in this set.
+ * if {@code matchVersion} is 3.0 or older, it returns new
+ * allocated Strings, so this method violates the Set interface.
+ * It is kept this way for backwards compatibility, normally it should
+ * return {@code char[]} on {@code next()}
+ *
+ */
+ @Override @SuppressWarnings("unchecked")
public Iterator iterator() {
- return (Iterator) stringIterator();
+ return map.matchVersion.onOrAfter(Version.LUCENE_31) ?
+ map.keySet().iterator() : (Iterator) stringIterator();
}
-
- /**
- * Efficient unmodifiable {@link CharArraySet}. This implementation does not
- * delegate calls to a give {@link CharArraySet} like
- * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
- * the internal representation of a {@link CharArraySet} to a super
- * constructor and overrides all mutators.
- */
- private static class UnmodifiableCharArraySet extends CharArraySet {
-
- private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase,
- int count) {
- super(matchVersion, entries, ignoreCase, count);
- }
-
- @Override
- public void clear() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean add(Object o){
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean addAll(Collection extends Object> coll) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean add(char[] text) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean add(CharSequence text) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean add(String text) {
- throw new UnsupportedOperationException();
- }
- }
-
- /**
- * Empty {@link UnmodifiableCharArraySet} optimized for speed.
- * Contains checks will always return false or throw
- * NPE if necessary.
- */
- private static final class EmptyCharArraySet extends UnmodifiableCharArraySet {
-
- private EmptyCharArraySet() {
- super(Version.LUCENE_CURRENT, new char[0][], false, 0);
- }
-
- @Override
- public boolean contains(char[] text, int off, int len) {
- if(text == null)
- throw new NullPointerException();
- return false;
- }
-
- @Override
- public boolean contains(CharSequence cs) {
- if(cs == null)
- throw new NullPointerException();
- return false;
- }
-
- @Override
- public boolean contains(Object o) {
- if(o == null)
- throw new NullPointerException();
- return false;
- }
- }
}
Index: src/test/org/apache/lucene/analysis/TestCharArrayMap.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestCharArrayMap.java (revision 0)
+++ src/test/org/apache/lucene/analysis/TestCharArrayMap.java (revision 0)
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.util.*;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestCharArrayMap extends LuceneTestCase {
+ Random r = newRandom();
+
+ public void doRandom(int iter, boolean ignoreCase) {
+ CharArrayMap map = new CharArrayMap(Version.LUCENE_CURRENT, 1, ignoreCase);
+ HashMap hmap = new HashMap();
+
+ char[] key;
+ for (int i=0; i cm = new CharArrayMap(Version.LUCENE_CURRENT, 2, false);
+ HashMap hm = new HashMap();
+ hm.put("foo",1);
+ hm.put("bar",2);
+ cm.putAll(hm);
+ assertEquals(hm.size(), cm.size());
+ hm.put("baz", 3);
+ cm.putAll(hm);
+ assertEquals(hm.size(), cm.size());
+
+ Iterator> iter1 = cm.entrySet().iterator();
+ int n=0;
+ while (iter1.hasNext()) {
+ Map.Entry entry = iter1.next();
+ Object key = entry.getKey();
+ Integer val = entry.getValue();
+ assertEquals(cm.get(key), val);
+ entry.setValue(val*100);
+ assertEquals(val*100, (int)cm.get(key));
+ n++;
+ }
+ assertEquals(hm.size(), n);
+ cm.clear();
+ cm.putAll(hm);
+ assertEquals(cm.size(), n);
+
+ CharArrayMap.EntryIterator iter2 = cm.entrySet().iterator();
+ n=0;
+ while (iter2.hasNext()) {
+ char[] keyc = iter2.nextKey();
+ Integer val = iter2.currentValue();
+ assertEquals(hm.get(new String(keyc)), val);
+ iter2.setValue(val*100);
+ assertEquals(val*100, (int)cm.get(keyc));
+ n++;
+ }
+ assertEquals(hm.size(), n);
+
+ cm.clear();
+ assertEquals(0, cm.size());
+ assertTrue(cm.isEmpty());
+ }
+
+ public void testModifyOnUnmodifiable(){
+ CharArrayMap map = new CharArrayMap(Version.LUCENE_CURRENT, 2, false);
+ map.put("foo",1);
+ map.put("bar",2);
+ final int size = map.size();
+ assertEquals(2, size);
+ assertTrue(map.containsKey("foo"));
+ assertEquals(1, map.get("foo").intValue());
+ assertTrue(map.containsKey("bar"));
+ assertEquals(2, map.get("bar").intValue());
+
+ map = CharArrayMap.unmodifiableMap(map);
+ assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
+ String NOT_IN_MAP = "SirGallahad";
+ assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
+ assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
+
+ try{
+ map.put(NOT_IN_MAP.toCharArray(), 3);
+ fail("Modified unmodifiable map");
+ }catch (UnsupportedOperationException e) {
+ // expected
+ assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+ assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+ assertEquals("Size of unmodifiable map has changed", size, map.size());
+ }
+
+ try{
+ map.put(NOT_IN_MAP, 3);
+ fail("Modified unmodifiable map");
+ }catch (UnsupportedOperationException e) {
+ // expected
+ assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+ assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+ assertEquals("Size of unmodifiable map has changed", size, map.size());
+ }
+
+ try{
+ map.put(new StringBuilder(NOT_IN_MAP), 3);
+ fail("Modified unmodifiable map");
+ }catch (UnsupportedOperationException e) {
+ // expected
+ assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+ assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+ assertEquals("Size of unmodifiable map has changed", size, map.size());
+ }
+
+ try{
+ map.clear();
+ fail("Modified unmodifiable map");
+ }catch (UnsupportedOperationException e) {
+ // expected
+ assertEquals("Size of unmodifiable map has changed", size, map.size());
+ }
+ try{
+ map.put((Object) NOT_IN_MAP, 3);
+ fail("Modified unmodifiable map");
+ }catch (UnsupportedOperationException e) {
+ // expected
+ assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+ assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+ assertEquals("Size of unmodifiable map has changed", size, map.size());
+ }
+
+ try{
+ map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));
+ fail("Modified unmodifiable map");
+ }catch (UnsupportedOperationException e) {
+ // expected
+ assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+ assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+ assertEquals("Size of unmodifiable map has changed", size, map.size());
+ }
+
+ assertTrue(map.containsKey("foo"));
+ assertEquals(1, map.get("foo").intValue());
+ assertTrue(map.containsKey("bar"));
+ assertEquals(2, map.get("bar").intValue());
+ }
+}
+
Property changes on: src\test\org\apache\lucene\analysis\TestCharArrayMap.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: src/test/org/apache/lucene/analysis/TestCharArraySet.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestCharArraySet.java (revision 905065)
+++ src/test/org/apache/lucene/analysis/TestCharArraySet.java (working copy)
@@ -92,7 +92,7 @@
}
public void testModifyOnUnmodifiable(){
- CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
+ CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10, true);
set.addAll(Arrays.asList(TEST_STOP_WORDS));
final int size = set.size();
set = CharArraySet.unmodifiableSet(set);
@@ -143,8 +143,12 @@
assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
assertEquals("Size of unmodifiable set has changed", size, set.size());
}
+
+ // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
+ // current key (now a char[]) on a Set would not hit any element of the CAS and therefor never call
+ // remove() on the iterator
try{
- set.removeAll(Arrays.asList(TEST_STOP_WORDS));
+ set.removeAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));
fail("Modified unmodifiable set");
}catch (UnsupportedOperationException e) {
// expected
@@ -152,7 +156,7 @@
}
try{
- set.retainAll(Arrays.asList(new String[]{NOT_IN_SET}));
+ set.retainAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(NOT_IN_SET), true));
fail("Modified unmodifiable set");
}catch (UnsupportedOperationException e) {
// expected