Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java =================================================================== --- src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (revision 1672555) +++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (working copy) @@ -16,38 +16,52 @@ */ package org.apache.jackrabbit.oak.plugins.segment; -import static com.google.common.collect.Lists.newArrayList; import static com.google.common.collect.Maps.newHashMapWithExpectedSize; -import static java.util.Collections.nCopies; import java.lang.ref.WeakReference; -import java.util.ArrayList; import java.util.Collection; import java.util.Map; import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy; /** - * Hash table of weak references to segment identifiers. + * A collection of weak references to segment identifiers. */ public class SegmentIdTable { /** - * Hash table of weak references to segment identifiers that are currently - * being accessed. The size of the table is always a power of two, which - * optimizes the {@link #expand()} operation. The table is indexed by the - * random identifier bits, which guarantees uniform distribution of entries. - * Each table entry is either {@code null} (when there are no matching - * identifiers) or a list of weak references to the matching identifiers. + * The array of weak references to segment identifiers that are currently + * being accessed. This represents a hash table that uses open addressing + * with linear probing. It is not a hash map, to speed up read access. + *

+ * The size of the table is always a power of two, so that we can use + * bitwise "and" instead of modulo. *

- * Actually, this is a array. It's not a hash map, to conserve memory (maps - * need much more memory). + * The table is indexed by the random identifier bits, which guarantees + * uniform distribution of entries. *

- * The list is not sorted (we could; lookup would be faster, but adding and - * removing entries would be slower). + * Open addressing with linear probing is used. Each table entry is either + * null (when there are no matching identifiers), a weak references to the + * matching identifier, or a weak reference to another identifier. + * There are no tombstone entries as there is no explicit remove operation, + * but a referent can become null if the entry is garbage collected. + *

+ * The array is not sorted (we could; lookup might be faster, but adding + * entries would be slower). + */ + @SuppressWarnings("unchecked") + private WeakReference[] references = + (WeakReference[]) new WeakReference[1024]; + + /** + * The number of used entries in this table. + */ + private int entryCount; + + /** + * The resize count (for diagnostics and testing). */ - private final ArrayList> references = - newArrayList(nCopies(1024, (WeakReference) null)); + private int resizeCount; private final SegmentTracker tracker; @@ -65,9 +79,11 @@ synchronized SegmentId getSegmentId(long msb, long lsb) { int first = getIndex(lsb); int index = first; - boolean shouldRefresh = false; + boolean resizeNeeded = false; + + int size = references.length; - WeakReference reference = references.get(index); + WeakReference reference = references[index]; while (reference != null) { SegmentId id = reference.get(); if (id != null @@ -75,64 +91,110 @@ && id.getLeastSignificantBits() == lsb) { return id; } - shouldRefresh = shouldRefresh || id == null; - index = (index + 1) % references.size(); - reference = references.get(index); + if (id == null) { + // we have a garbage collected entry: resize + resizeNeeded = true; + } + // open addressing / linear probing + index = (index + 1) & (size - 1); + reference = references[index]; + } + + if (entryCount > size * 0.75) { + // more than 75% full + resizeNeeded = true; } SegmentId id = new SegmentId(tracker, msb, lsb); - references.set(index, new WeakReference(id)); - if (shouldRefresh) { - refresh(); + entryCount++; + references[index] = new WeakReference(id); + if (resizeNeeded) { + refreshAndResize(); } return id; } + + /** + * Get the internal map size (used for testing and diagnostics). + * + * @return the map size + */ + int getMapSize() { + return references.length; + } + + /** + * Get the entry count (used for testing and diagnostics). + * + * @return the entry count + */ + int getEntryCount() { + return entryCount; + } + + /** + * Get the number of resize operations (used for testing and diagnostics). + * + * @return the resize count + */ + int getResizeCount() { + return resizeCount; + } /** * Returns all segment identifiers that are currently referenced in memory. - * - * @return referenced segment identifiers */ void collectReferencedIds(Collection ids) { - ids.addAll(refresh()); + ids.addAll(refreshAndResize()); } - private synchronized Collection refresh() { - int size = references.size(); + @SuppressWarnings("unchecked") + private synchronized Collection refreshAndResize() { + resizeCount++; + int size = references.length; Map> ids = newHashMapWithExpectedSize(size); + // if there is a collision (that is, if there is an entry that is at the + // wrong location), then we recreate the map boolean hashCollisions = false; boolean emptyReferences = false; for (int i = 0; i < size; i++) { - WeakReference reference = references.get(i); + WeakReference reference = references[i]; if (reference != null) { SegmentId id = reference.get(); if (id != null) { ids.put(id, reference); - hashCollisions = hashCollisions || (i != getIndex(id)); + if (i != getIndex(id)) { + hashCollisions = true; + } } else { - references.set(i, null); + references[i] = null; emptyReferences = true; } } } - - while (2 * ids.size() > size) { + entryCount = ids.size(); + size = 2; + while (entryCount > size * 0.75) { + // more than 75% full size *= 2; } - if ((hashCollisions && emptyReferences) || size != references.size()) { - references.clear(); - references.addAll(nCopies(size, (WeakReference) null)); + // we need to re-build the table if the new size is different, + // but also if we removed some of the entries (because an entry was + // garbage collected) and there is at least one entry at the "wrong" + // location (due to open addressing) + if (size != references.length || (hashCollisions && emptyReferences)) { + references = (WeakReference[]) new WeakReference[size]; for (Map.Entry> entry : ids.entrySet()) { int index = getIndex(entry.getKey()); - while (references.get(index) != null) { - index = (index + 1) % size; + while (references[index] != null) { + index = (index + 1) & (size - 1); } - references.set(index, entry.getValue()); + references[index] = entry.getValue(); } } @@ -144,27 +206,27 @@ } private int getIndex(long lsb) { - return ((int) lsb) & (references.size() - 1); + return ((int) lsb) & (references.length - 1); } synchronized void clearSegmentIdTables(CompactionStrategy strategy) { - int size = references.size(); + int size = references.length; boolean dirty = false; for (int i = 0; i < size; i++) { - WeakReference reference = references.get(i); + WeakReference reference = references[i]; if (reference != null) { SegmentId id = reference.get(); if (id != null) { if (strategy.canRemove(id)) { reference.clear(); - references.set(i, null); + references[i] = null; dirty = true; } } } } if (dirty) { - refresh(); + refreshAndResize(); } } } Index: src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTestIT.java =================================================================== --- src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTestIT.java (revision 0) +++ src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTestIT.java (working copy) @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import junit.framework.Assert; + +import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore; +import org.junit.Test; + +public class SegmentIdTableTestIT { + + /** + * OAK-2752 + */ + @Test + public void endlessSearchLoop() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + for (int i = 0; i < 1024; i++) { + refs.add(tbl.getSegmentId(i, i % 64)); + } + + Callable c = new Callable() { + + @Override + public SegmentId call() throws Exception { + // (2,1) doesn't exist + return tbl.getSegmentId(2, 1); + } + }; + Future f = Executors.newSingleThreadExecutor().submit(c); + SegmentId s = null; + try { + s = f.get(5, TimeUnit.SECONDS); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + Assert.assertNotNull(s); + Assert.assertEquals(2, s.getMostSignificantBits()); + Assert.assertEquals(1, s.getLeastSignificantBits()); + } + + @Test + public void randomizedTest() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + Random r = new Random(1); + for (int i = 0; i < 16 * 1024; i++) { + refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong())); + } + Assert.assertEquals(16 * 1024, tbl.getEntryCount()); + Assert.assertEquals(16 * 2048, tbl.getMapSize()); + Assert.assertEquals(5, tbl.getResizeCount()); + + r = new Random(1); + for (int i = 0; i < 16 * 1024; i++) { + refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong())); + Assert.assertEquals(16 * 1024, tbl.getEntryCount()); + Assert.assertEquals(16 * 2048, tbl.getMapSize()); + Assert.assertEquals(5, tbl.getResizeCount()); + } + + refs.clear(); + + } +} \ No newline at end of file