Index: src/java/org/apache/lucene/analysis/CharArrayMap.java =================================================================== --- src/java/org/apache/lucene/analysis/CharArrayMap.java (revision 905065) +++ src/java/org/apache/lucene/analysis/CharArrayMap.java (working copy) @@ -1,15 +1,5 @@ package org.apache.lucene.analysis; -import java.util.Arrays; -import java.util.AbstractSet; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.Set; - -import org.apache.lucene.util.CharacterUtils; -import org.apache.lucene.util.Version; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -27,14 +17,22 @@ * limitations under the License. */ +import java.util.Arrays; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Iterator; +import java.util.Map; +import org.apache.lucene.util.CharacterUtils; +import org.apache.lucene.util.Version; + /** - * A simple class that stores Strings as char[]'s in a - * hash table. Note that this is not a general purpose + * A simple class that stores key Strings as char[]'s in a + * hash table. Note that this is not a general purpose * class. For example, it cannot remove items from the - * set, nor does it resize its hash table to be smaller, - * etc. It is designed to be quick to test if a char[] - * is in the set without the necessity of converting it + * map, nor does it resize its hash table to be smaller, + * etc. It is designed to be quick to retrieve items + * by char[] keys without the necessity of converting * to a String first. *

You must specify the required {@link Version} * compatibility when creating {@link CharArraySet}: @@ -46,29 +44,22 @@ * lowercased correctly due to the lack of Unicode 4 * support in JDK 1.4. To use instances of * {@link CharArraySet} with the behavior before Lucene - * 3.1 pass a {@link Version} < 3.1 to the constructors. - *

- * Please note: This class implements {@link java.util.Set Set} but - * does not behave like it should in all cases. The generic type is - * {@code Set}, because you can add any object to it, - * that has a string representation. The add methods will use - * {@link Object#toString} and store the result using a {@code char[]} - * buffer. The same behavior have the {@code contains()} methods. - * The {@link #iterator()} returns an {@code Iterator}. - * For type safety also {@link #stringIterator()} is provided. + * 3.1 pass a {@link Version} < 3.1 to the constructors. */ -public class CharArraySet extends AbstractSet { +public class CharArrayMap extends AbstractMap { + // private only because missing generics + private static final CharArrayMap EMPTY_MAP = new EmptyCharArrayMap(); + private final static int INIT_SIZE = 8; - private char[][] entries; + private final CharacterUtils charUtils; + private boolean ignoreCase; private int count; - private final boolean ignoreCase; - public static final CharArraySet EMPTY_SET = new EmptyCharArraySet(); - - private final CharacterUtils charUtils; - private final Version matchVersion; + final Version matchVersion; // package private because used in CharArraySet + char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator + V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator /** - * Create set with enough capacity to hold startSize terms + * Create map with enough capacity to hold startSize terms * * @param matchVersion * compatibility match version see Version @@ -79,101 +70,104 @@ * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) { + @SuppressWarnings("unchecked") + public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) { this.ignoreCase = ignoreCase; int size = INIT_SIZE; while(startSize + (startSize>>2) > size) size <<= 1; - entries = new char[size][]; + keys = new char[size][]; + values = (V[]) new Object[size]; this.charUtils = CharacterUtils.getInstance(matchVersion); this.matchVersion = matchVersion; } /** - * Creates a set from a Collection of objects. + * Creates a map from the mappings in another map. * * @param matchVersion * compatibility match version see Version * note above for details. * @param c - * a collection whose elements to be placed into the set + * a map whose mappings to be copied * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(Version matchVersion, Collection c, boolean ignoreCase) { + public CharArrayMap(Version matchVersion, Map c, boolean ignoreCase) { this(matchVersion, c.size(), ignoreCase); - addAll(c); + putAll(c); } - - /** - * Creates a set with enough capacity to hold startSize terms - * - * @param startSize - * the initial capacity - * @param ignoreCase - * false if and only if the set should be case sensitive - * otherwise true. - * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead - */ - @Deprecated - public CharArraySet(int startSize, boolean ignoreCase) { - this(Version.LUCENE_30, startSize, ignoreCase); - } - /** - * Creates a set from a Collection of objects. - * - * @param c - * a collection whose elements to be placed into the set - * @param ignoreCase - * false if and only if the set should be case sensitive - * otherwise true. - * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead - */ - @Deprecated - public CharArraySet(Collection c, boolean ignoreCase) { - this(Version.LUCENE_30, c.size(), ignoreCase); - addAll(c); + /** Create set from keys */ + private CharArrayMap(CharArrayMap toCopy){ + this.keys = toCopy.keys; + this.values = toCopy.values; + this.ignoreCase = toCopy.ignoreCase; + this.count = toCopy.count; + this.charUtils = toCopy.charUtils; + this.matchVersion = toCopy.matchVersion; } - /** Create set from entries */ - private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){ - this.entries = entries; - this.ignoreCase = ignoreCase; - this.count = count; - this.charUtils = CharacterUtils.getInstance(matchVersion); - this.matchVersion = matchVersion; - } - - /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */ + /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */ @Override public void clear() { count = 0; - Arrays.fill(entries, null); + Arrays.fill(keys, null); + Arrays.fill(values, null); } /** true if the len chars of text starting at off - * are in the set */ - public boolean contains(char[] text, int off, int len) { - return entries[getSlot(text, off, len)] != null; + * are in the {@link #keySet} */ + public boolean containsKey(char[] text, int off, int len) { + return keys[getSlot(text, off, len)] != null; } - /** true if the CharSequence is in the set */ - public boolean contains(CharSequence cs) { - return entries[getSlot(cs)] != null; + /** true if the CharSequence is in the {@link #keySet} */ + public boolean containsKey(CharSequence cs) { + return keys[getSlot(cs)] != null; } + @Override + public boolean containsKey(Object o) { + if (o instanceof char[]) { + final char[] text = (char[])o; + return containsKey(text, 0, text.length); + } + return containsKey(o.toString()); + } + + + /** returns the value of the mapping of len chars of text + * starting at off */ + public V get(char[] text, int off, int len) { + return values[getSlot(text, off, len)]; + } + + /** returns the value of the mapping of the chars inside this {@code CharSequence} */ + public V get(CharSequence cs) { + return values[getSlot(cs)]; + } + + @Override + public V get(Object o) { + if (o instanceof char[]) { + final char[] text = (char[])o; + return get(text, 0, text.length); + } + return get(o.toString()); + } + private int getSlot(char[] text, int off, int len) { int code = getHashCode(text, off, len); - int pos = code & (entries.length-1); - char[] text2 = entries[pos]; + int pos = code & (keys.length-1); + char[] text2 = keys[pos]; if (text2 != null && !equals(text, off, len, text2)) { final int inc = ((code>>8)+code)|1; do { code += inc; - pos = code & (entries.length-1); - text2 = entries[pos]; + pos = code & (keys.length-1); + text2 = keys[pos]; } while (text2 != null && !equals(text, off, len, text2)); } return pos; @@ -182,34 +176,42 @@ /** Returns true if the String is in the set */ private int getSlot(CharSequence text) { int code = getHashCode(text); - int pos = code & (entries.length-1); - char[] text2 = entries[pos]; + int pos = code & (keys.length-1); + char[] text2 = keys[pos]; if (text2 != null && !equals(text, text2)) { final int inc = ((code>>8)+code)|1; do { code += inc; - pos = code & (entries.length-1); - text2 = entries[pos]; + pos = code & (keys.length-1); + text2 = keys[pos]; } while (text2 != null && !equals(text, text2)); } return pos; } - /** Add this CharSequence into the set */ - public boolean add(CharSequence text) { - return add(text.toString()); // could be more efficient + /** Add the given mapping. */ + public V put(CharSequence text, V value) { + return put(text.toString(), value); // could be more efficient } + + @Override + public V put(Object o, V value) { + if (o instanceof char[]) { + return put((char[])o, value); + } + return put(o.toString(), value); + } - /** Add this String into the set */ - public boolean add(String text) { - return add(text.toCharArray()); + /** Add the given mapping. */ + public V put(String text, V value) { + return put(text.toCharArray(), value); } - /** Add this char[] directly to the set. + /** Add the given mapping. * If ignoreCase is true for this Set, the text array will be directly modified. * The user should never modify this text array after calling this method. */ - public boolean add(char[] text) { + public V put(char[] text, V value) { if (ignoreCase) for(int i=0;i>2) > entries.length) { + if (count + (count>>2) > keys.length) { rehash(); } - return true; + return null; } + @SuppressWarnings("unchecked") + private void rehash() { + assert keys.length == values.length; + final int newSize = 2*keys.length; + final char[][] oldkeys = keys; + final V[] oldvalues = values; + keys = new char[newSize][]; + values = (V[]) new Object[newSize]; + + for(int i=0; i> { + private int pos=-1; + private int lastPos; + private final boolean allowModify; + + private EntryIterator(boolean allowModify) { + this.allowModify = allowModify; + goNext(); } - return add(o.toString()); - } - - /** - * Returns an unmodifiable {@link CharArraySet}. This allows to provide - * unmodifiable views of internal sets for "read-only" use. - * - * @param set - * a set for which the unmodifiable set is returned. - * @return an new unmodifiable {@link CharArraySet}. - * @throws NullPointerException - * if the given set is null. - */ - public static CharArraySet unmodifiableSet(CharArraySet set) { - if (set == null) - throw new NullPointerException("Given set is null"); - if (set == EMPTY_SET) - return EMPTY_SET; - if (set instanceof UnmodifiableCharArraySet) - return set; - /* - * Instead of delegating calls to the given set copy the low-level values to - * the unmodifiable Subclass - */ - return new UnmodifiableCharArraySet(set.matchVersion, set.entries, set.ignoreCase, set.count); - } + private void goNext() { + lastPos = pos; + pos++; + while (pos < keys.length && keys[pos] == null) pos++; + } - /** - * Returns a copy of the given set as a {@link CharArraySet}. If the given set - * is a {@link CharArraySet} the ignoreCase property will be preserved. - * - * @param set - * a set to copy - * @return a copy of the given set as a {@link CharArraySet}. If the given set - * is a {@link CharArraySet} the ignoreCase and matchVersion property will be - * preserved. - * @deprecated use {@link #copy(Version, Set)} instead. - */ - @Deprecated - public static CharArraySet copy(final Set set) { - if(set == EMPTY_SET) - return EMPTY_SET; - return copy(Version.LUCENE_30, set); - } - - /** - * Returns a copy of the given set as a {@link CharArraySet}. If the given set - * is a {@link CharArraySet} the ignoreCase property will be preserved. - *

- * Note: If you intend to create a copy of another {@link CharArraySet} where - * the {@link Version} of the source set differs from its copy - * {@link #CharArraySet(Version, Collection, boolean)} should be used instead. - * The {@link #copy(Version, Set)} will preserve the {@link Version} of the - * source set it is an instance of {@link CharArraySet}. - *

- * - * @param matchVersion - * compatibility match version see Version - * note above for details. This argument will be ignored if the - * given set is a {@link CharArraySet}. - * @param set - * a set to copy - * @return a copy of the given set as a {@link CharArraySet}. If the given set - * is a {@link CharArraySet} the ignoreCase property as well as the - * matchVersion will be of the given set will be preserved. - */ - public static CharArraySet copy(final Version matchVersion, final Set set) { - if(set == EMPTY_SET) - return EMPTY_SET; - if(set instanceof CharArraySet) { - final CharArraySet source = (CharArraySet) set; - // use fast path instead of iterating all values - // this is even on very small sets ~10 times faster than iterating - final char[][] entries = new char[source.entries.length][]; - System.arraycopy(source.entries, 0, entries, 0, entries.length); - return new CharArraySet(source.matchVersion, entries, source.ignoreCase, source.count); + public boolean hasNext() { + return pos < keys.length; } - return new CharArraySet(matchVersion, set, false); - } - - /** The Iterator for this set. Strings are constructed on the fly, so - * use nextCharArray for more efficient access. */ - public class CharArraySetIterator implements Iterator { - int pos=-1; - char[] next; - CharArraySetIterator() { + /** gets the next key... do not modify the returned char[] */ + public char[] nextKey() { goNext(); + return keys[lastPos]; } - private void goNext() { - next = null; - pos++; - while (pos < entries.length && (next=entries[pos]) == null) pos++; + /** gets the next key as a newly created String object */ + public String nextKeyString() { + return new String(nextKey()); } - public boolean hasNext() { - return next != null; + /** returns the value associated with the last key returned */ + public V currentValue() { + return values[lastPos]; } - /** do not modify the returned char[] */ - public char[] nextCharArray() { - char[] ret = next; - goNext(); - return ret; + /** sets the value associated with the last key returned */ + public V setValue(V value) { + if (!allowModify) + throw new UnsupportedOperationException(); + V old = values[lastPos]; + values[lastPos] = value; + return old; } - /** Returns the next String, as a Set would... - * use nextCharArray() for better efficiency. */ - public String next() { - return new String(nextCharArray()); + /** use nextCharArray() + currentValue() for better efficiency. */ + public Map.Entry next() { + goNext(); + return new MapEntry(lastPos, allowModify); } public void remove() { @@ -462,93 +410,241 @@ } } - /** returns an iterator of new allocated Strings */ - public Iterator stringIterator() { - return new CharArraySetIterator(); + private final class MapEntry implements Map.Entry { + private final int pos; + private final boolean allowModify; + + private MapEntry(int pos, boolean allowModify) { + this.pos = pos; + this.allowModify = allowModify; + } + + public Object getKey() { + // we must clone here, as putAll to another CharArrayMap + // with other case sensitivity flag would corrupt the keys + return keys[pos].clone(); + } + + public V getValue() { + return values[pos]; + } + + public V setValue(V value) { + if (!allowModify) + throw new UnsupportedOperationException(); + final V old = values[pos]; + values[pos] = value; + return old; + } + + @Override + public String toString() { + return new StringBuilder().append(keys[pos]).append('=').append(values[pos]).toString(); + } } - /** returns an iterator of new allocated Strings, this method violates the Set interface */ - @Override - @SuppressWarnings("unchecked") - public Iterator iterator() { - return (Iterator) stringIterator(); + /** public EntrySet class so efficient methods are exposed to users */ + public final class EntrySet extends AbstractSet> { + private final boolean allowModify; + + private EntrySet(boolean allowModify) { + this.allowModify = allowModify; + } + + @Override + public EntryIterator iterator() { + return new EntryIterator(allowModify); + } + + @Override + public boolean contains(Object o) { + if (!(o instanceof Map.Entry)) + return false; + final Map.Entry e = (Map.Entry)o; + final Object key = e.getKey(); + final Object val = e.getValue(); + final Object v = get(key); + return v == null ? val == null : v.equals(val); + } + + @Override + public boolean remove(Object o) { + throw new UnsupportedOperationException(); + } + + @Override + public int size() { + return count; + } + + @Override + public void clear() { + if (!allowModify) + throw new UnsupportedOperationException(); + CharArrayMap.this.clear(); + } } /** - * Efficient unmodifiable {@link CharArraySet}. This implementation does not - * delegate calls to a give {@link CharArraySet} like - * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes - * the internal representation of a {@link CharArraySet} to a super - * constructor and overrides all mutators. + * Returns an unmodifiable {@link CharArrayMap}. This allows to provide + * unmodifiable views of internal map for "read-only" use. + * + * @param map + * a map for which the unmodifiable map is returned. + * @return an new unmodifiable {@link CharArrayMap}. + * @throws NullPointerException + * if the given map is null. */ - private static class UnmodifiableCharArraySet extends CharArraySet { + public static CharArrayMap unmodifiableMap(CharArrayMap map) { + if (map == null) + throw new NullPointerException("Given map is null"); + if (map == emptyMap() || map.isEmpty()) + return emptyMap(); + if (map instanceof UnmodifiableCharArrayMap) + return map; + return new UnmodifiableCharArrayMap(map); + } - private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, - int count) { - super(matchVersion, entries, ignoreCase, count); + /** + * Returns a copy of the given map as a {@link CharArrayMap}. If the given map + * is a {@link CharArrayMap} the ignoreCase property will be preserved. + *

+ * Note: If you intend to create a copy of another {@link CharArrayMap} where + * the {@link Version} of the source map differs from its copy + * {@link #CharArrayMap(Version, Map, boolean)} should be used instead. + * The {@link #copy(Version, Map)} will preserve the {@link Version} of the + * source map it is an instance of {@link CharArrayMap}. + *

+ * + * @param matchVersion + * compatibility match version see Version + * note above for details. This argument will be ignored if the + * given map is a {@link CharArrayMap}. + * @param map + * a map to copy + * @return a copy of the given map as a {@link CharArrayMap}. If the given map + * is a {@link CharArrayMap} the ignoreCase property as well as the + * matchVersion will be of the given map will be preserved. + */ + @SuppressWarnings("unchecked") + public static CharArrayMap copy(final Version matchVersion, final Map map) { + if(map == EMPTY_MAP) + return emptyMap(); + if(map instanceof CharArrayMap) { + CharArrayMap m = (CharArrayMap) map; + // use fast path instead of iterating all values + // this is even on very small sets ~10 times faster than iterating + final char[][] keys = new char[m.keys.length][]; + System.arraycopy(m.keys, 0, keys, 0, keys.length); + final V[] values = (V[]) new Object[m.values.length]; + System.arraycopy(m.values, 0, values, 0, values.length); + m = new CharArrayMap(m); + m.keys = keys; + m.values = values; + return m; } + return new CharArrayMap(matchVersion, map, false); + } + + /** Returns an empty, unmodifiable map. */ + @SuppressWarnings("unchecked") + public static CharArrayMap emptyMap() { + return (CharArrayMap) EMPTY_MAP; + } + + // package private CharArraySet instanceof check in CharArraySet + static class UnmodifiableCharArrayMap extends CharArrayMap { + UnmodifiableCharArrayMap(CharArrayMap map) { + super(map); + } + @Override public void clear() { throw new UnsupportedOperationException(); } @Override - public boolean add(Object o){ + public V put(Object o, V val){ throw new UnsupportedOperationException(); } @Override - public boolean addAll(Collection coll) { + public V put(char[] text, V val) { throw new UnsupportedOperationException(); } - + @Override - public boolean add(char[] text) { + public V put(CharSequence text, V val) { throw new UnsupportedOperationException(); } @Override - public boolean add(CharSequence text) { + public V put(String text, V val) { throw new UnsupportedOperationException(); } - + @Override - public boolean add(String text) { + public V remove(Object key) { throw new UnsupportedOperationException(); } + + @Override + public EntrySet entrySet() { + return new EntrySet(false); + } } /** - * Empty {@link UnmodifiableCharArraySet} optimized for speed. + * Empty {@link UnmodifiableCharArrayMap} optimized for speed. * Contains checks will always return false or throw * NPE if necessary. */ - private static final class EmptyCharArraySet extends UnmodifiableCharArraySet { - - private EmptyCharArraySet() { - super(Version.LUCENE_CURRENT, new char[0][], false, 0); + private static final class EmptyCharArrayMap extends UnmodifiableCharArrayMap { + EmptyCharArrayMap() { + super(new CharArrayMap(Version.LUCENE_CURRENT, 0, false)); } @Override - public boolean contains(char[] text, int off, int len) { + public boolean containsKey(char[] text, int off, int len) { if(text == null) throw new NullPointerException(); return false; } @Override - public boolean contains(CharSequence cs) { + public boolean containsKey(CharSequence cs) { if(cs == null) throw new NullPointerException(); return false; } @Override - public boolean contains(Object o) { + public boolean containsKey(Object o) { if(o == null) throw new NullPointerException(); return false; } + + @Override + public V get(char[] text, int off, int len) { + if(text == null) + throw new NullPointerException(); + return null; + } + + @Override + public V get(CharSequence cs) { + if(cs == null) + throw new NullPointerException(); + return null; + } + + @Override + public V get(Object o) { + if(o == null) + throw new NullPointerException(); + return null; + } } } Index: src/java/org/apache/lucene/analysis/CharArraySet.java =================================================================== --- src/java/org/apache/lucene/analysis/CharArraySet.java (revision 905065) +++ src/java/org/apache/lucene/analysis/CharArraySet.java (working copy) @@ -1,15 +1,5 @@ package org.apache.lucene.analysis; -import java.util.Arrays; -import java.util.AbstractSet; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.Set; - -import org.apache.lucene.util.CharacterUtils; -import org.apache.lucene.util.Version; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -27,7 +17,14 @@ * limitations under the License. */ +import java.util.Arrays; +import java.util.AbstractSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import org.apache.lucene.util.Version; + /** * A simple class that stores Strings as char[]'s in a * hash table. Note that this is not a general purpose @@ -58,15 +55,11 @@ * For type safety also {@link #stringIterator()} is provided. */ public class CharArraySet extends AbstractSet { - private final static int INIT_SIZE = 8; - private char[][] entries; - private int count; - private final boolean ignoreCase; - public static final CharArraySet EMPTY_SET = new EmptyCharArraySet(); + public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap()); + private static final Object PLACEHOLDER = new Object(); - private final CharacterUtils charUtils; - private final Version matchVersion; - + private final CharArrayMap map; + /** * Create set with enough capacity to hold startSize terms * @@ -80,13 +73,7 @@ * otherwise true. */ public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) { - this.ignoreCase = ignoreCase; - int size = INIT_SIZE; - while(startSize + (startSize>>2) > size) - size <<= 1; - entries = new char[size][]; - this.charUtils = CharacterUtils.getInstance(matchVersion); - this.matchVersion = matchVersion; + map = new CharArrayMap(matchVersion, startSize, ignoreCase); } /** @@ -101,7 +88,7 @@ * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(Version matchVersion, Collection c, boolean ignoreCase) { + public CharArraySet(Version matchVersion, Collection c, boolean ignoreCase) { this(matchVersion, c.size(), ignoreCase); addAll(c); } @@ -132,77 +119,51 @@ * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead */ @Deprecated - public CharArraySet(Collection c, boolean ignoreCase) { + public CharArraySet(Collection c, boolean ignoreCase) { this(Version.LUCENE_30, c.size(), ignoreCase); addAll(c); } /** Create set from entries */ - private CharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, int count){ - this.entries = entries; - this.ignoreCase = ignoreCase; - this.count = count; - this.charUtils = CharacterUtils.getInstance(matchVersion); - this.matchVersion = matchVersion; + private CharArraySet(final CharArrayMap map){ + this.map = map; } /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */ @Override public void clear() { - count = 0; - Arrays.fill(entries, null); + map.clear(); } /** true if the len chars of text starting at off * are in the set */ public boolean contains(char[] text, int off, int len) { - return entries[getSlot(text, off, len)] != null; + return map.containsKey(text, off, len); } /** true if the CharSequence is in the set */ public boolean contains(CharSequence cs) { - return entries[getSlot(cs)] != null; + return map.containsKey(cs); } - private int getSlot(char[] text, int off, int len) { - int code = getHashCode(text, off, len); - int pos = code & (entries.length-1); - char[] text2 = entries[pos]; - if (text2 != null && !equals(text, off, len, text2)) { - final int inc = ((code>>8)+code)|1; - do { - code += inc; - pos = code & (entries.length-1); - text2 = entries[pos]; - } while (text2 != null && !equals(text, off, len, text2)); - } - return pos; + @Override + public boolean contains(Object o) { + return map.containsKey(o); } - /** Returns true if the String is in the set */ - private int getSlot(CharSequence text) { - int code = getHashCode(text); - int pos = code & (entries.length-1); - char[] text2 = entries[pos]; - if (text2 != null && !equals(text, text2)) { - final int inc = ((code>>8)+code)|1; - do { - code += inc; - pos = code & (entries.length-1); - text2 = entries[pos]; - } while (text2 != null && !equals(text, text2)); - } - return pos; + @Override + public boolean add(Object o) { + return map.put(o, PLACEHOLDER) == null; } /** Add this CharSequence into the set */ public boolean add(CharSequence text) { - return add(text.toString()); // could be more efficient + return map.put(text, PLACEHOLDER) == null; } /** Add this String into the set */ public boolean add(String text) { - return add(text.toCharArray()); + return map.put(text, PLACEHOLDER) == null; } /** Add this char[] directly to the set. @@ -210,141 +171,18 @@ * The user should never modify this text array after calling this method. */ public boolean add(char[] text) { - if (ignoreCase) - for(int i=0;i>2) > entries.length) { - rehash(); - } - - return true; + return map.put(text, PLACEHOLDER) == null; } - private boolean equals(char[] text1, int off, int len, char[] text2) { - if (len != text2.length) - return false; - final int limit = off+len; - if (ignoreCase) { - for(int i=0;i for this set. Strings are constructed on the fly, so - * use nextCharArray for more efficient access. */ + * use nextCharArray for more efficient access. + * @deprecated Use the standard iterator, which returns {@code char[]} instances. + */ + @Deprecated public class CharArraySetIterator implements Iterator { int pos=-1; char[] next; - CharArraySetIterator() { + private CharArraySetIterator() { goNext(); } private void goNext() { next = null; pos++; - while (pos < entries.length && (next=entries[pos]) == null) pos++; + while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++; } public boolean hasNext() { @@ -462,93 +293,26 @@ } } - /** returns an iterator of new allocated Strings */ + /** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}). + * @deprecated Use {@link #iterator}, which returns {@code char[]} instances. + */ + @Deprecated public Iterator stringIterator() { return new CharArraySetIterator(); } - /** returns an iterator of new allocated Strings, this method violates the Set interface */ - @Override - @SuppressWarnings("unchecked") + /** Returns an {@link Iterator} depending on the version used: + *
    + *
  • if {@code matchVersion} ≥ 3.1, it returns {@code char[]} instances in this set.
  • + *
  • if {@code matchVersion} is 3.0 or older, it returns new + * allocated Strings, so this method violates the Set interface. + * It is kept this way for backwards compatibility, normally it should + * return {@code char[]} on {@code next()}
  • + *
+ */ + @Override @SuppressWarnings("unchecked") public Iterator iterator() { - return (Iterator) stringIterator(); + return map.matchVersion.onOrAfter(Version.LUCENE_31) ? + map.keySet().iterator() : (Iterator) stringIterator(); } - - /** - * Efficient unmodifiable {@link CharArraySet}. This implementation does not - * delegate calls to a give {@link CharArraySet} like - * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes - * the internal representation of a {@link CharArraySet} to a super - * constructor and overrides all mutators. - */ - private static class UnmodifiableCharArraySet extends CharArraySet { - - private UnmodifiableCharArraySet(Version matchVersion, char[][] entries, boolean ignoreCase, - int count) { - super(matchVersion, entries, ignoreCase, count); - } - - @Override - public void clear() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean add(Object o){ - throw new UnsupportedOperationException(); - } - - @Override - public boolean addAll(Collection coll) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean add(char[] text) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean add(CharSequence text) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean add(String text) { - throw new UnsupportedOperationException(); - } - } - - /** - * Empty {@link UnmodifiableCharArraySet} optimized for speed. - * Contains checks will always return false or throw - * NPE if necessary. - */ - private static final class EmptyCharArraySet extends UnmodifiableCharArraySet { - - private EmptyCharArraySet() { - super(Version.LUCENE_CURRENT, new char[0][], false, 0); - } - - @Override - public boolean contains(char[] text, int off, int len) { - if(text == null) - throw new NullPointerException(); - return false; - } - - @Override - public boolean contains(CharSequence cs) { - if(cs == null) - throw new NullPointerException(); - return false; - } - - @Override - public boolean contains(Object o) { - if(o == null) - throw new NullPointerException(); - return false; - } - } } Index: src/test/org/apache/lucene/analysis/TestCharArrayMap.java =================================================================== --- src/test/org/apache/lucene/analysis/TestCharArrayMap.java (revision 0) +++ src/test/org/apache/lucene/analysis/TestCharArrayMap.java (revision 0) @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis; + +import java.util.*; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; + +public class TestCharArrayMap extends LuceneTestCase { + Random r = newRandom(); + + public void doRandom(int iter, boolean ignoreCase) { + CharArrayMap map = new CharArrayMap(Version.LUCENE_CURRENT, 1, ignoreCase); + HashMap hmap = new HashMap(); + + char[] key; + for (int i=0; i cm = new CharArrayMap(Version.LUCENE_CURRENT, 2, false); + HashMap hm = new HashMap(); + hm.put("foo",1); + hm.put("bar",2); + cm.putAll(hm); + assertEquals(hm.size(), cm.size()); + hm.put("baz", 3); + cm.putAll(hm); + assertEquals(hm.size(), cm.size()); + + Iterator> iter1 = cm.entrySet().iterator(); + int n=0; + while (iter1.hasNext()) { + Map.Entry entry = iter1.next(); + Object key = entry.getKey(); + Integer val = entry.getValue(); + assertEquals(cm.get(key), val); + entry.setValue(val*100); + assertEquals(val*100, (int)cm.get(key)); + n++; + } + assertEquals(hm.size(), n); + cm.clear(); + cm.putAll(hm); + assertEquals(cm.size(), n); + + CharArrayMap.EntryIterator iter2 = cm.entrySet().iterator(); + n=0; + while (iter2.hasNext()) { + char[] keyc = iter2.nextKey(); + Integer val = iter2.currentValue(); + assertEquals(hm.get(new String(keyc)), val); + iter2.setValue(val*100); + assertEquals(val*100, (int)cm.get(keyc)); + n++; + } + assertEquals(hm.size(), n); + + cm.clear(); + assertEquals(0, cm.size()); + assertTrue(cm.isEmpty()); + } + + public void testModifyOnUnmodifiable(){ + CharArrayMap map = new CharArrayMap(Version.LUCENE_CURRENT, 2, false); + map.put("foo",1); + map.put("bar",2); + final int size = map.size(); + assertEquals(2, size); + assertTrue(map.containsKey("foo")); + assertEquals(1, map.get("foo").intValue()); + assertTrue(map.containsKey("bar")); + assertEquals(2, map.get("bar").intValue()); + + map = CharArrayMap.unmodifiableMap(map); + assertEquals("Map size changed due to unmodifiableMap call" , size, map.size()); + String NOT_IN_MAP = "SirGallahad"; + assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String already exists in map", map.get(NOT_IN_MAP)); + + try{ + map.put(NOT_IN_MAP.toCharArray(), 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.put(NOT_IN_MAP, 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.put(new StringBuilder(NOT_IN_MAP), 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.clear(); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + try{ + map.put((Object) NOT_IN_MAP, 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.putAll(Collections.singletonMap(NOT_IN_MAP, 3)); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + assertTrue(map.containsKey("foo")); + assertEquals(1, map.get("foo").intValue()); + assertTrue(map.containsKey("bar")); + assertEquals(2, map.get("bar").intValue()); + } +} + Property changes on: src\test\org\apache\lucene\analysis\TestCharArrayMap.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: src/test/org/apache/lucene/analysis/TestCharArraySet.java =================================================================== --- src/test/org/apache/lucene/analysis/TestCharArraySet.java (revision 905065) +++ src/test/org/apache/lucene/analysis/TestCharArraySet.java (working copy) @@ -92,7 +92,7 @@ } public void testModifyOnUnmodifiable(){ - CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true); + CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10, true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); final int size = set.size(); set = CharArraySet.unmodifiableSet(set); @@ -143,8 +143,12 @@ assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } + + // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's + // current key (now a char[]) on a Set would not hit any element of the CAS and therefor never call + // remove() on the iterator try{ - set.removeAll(Arrays.asList(TEST_STOP_WORDS)); + set.removeAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(TEST_STOP_WORDS), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected @@ -152,7 +156,7 @@ } try{ - set.retainAll(Arrays.asList(new String[]{NOT_IN_SET})); + set.retainAll(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(NOT_IN_SET), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected